From eccb57d4b8e6d8ed197791748e678b6b5ac0387a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 21 Apr 2018 18:02:17 +0300 Subject: [PATCH] vk: AMD primitive restart bug workaround - Emulate primitive restart with degenerate triangles --- rpcs3/Emu/RSX/VK/VKFormats.h | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 28 +++++--------- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 54 ++++++++++----------------- rpcs3/Emu/RSX/VK/VKHelpers.h | 3 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 56 +++++++++------------------- rpcs3/Emu/RSX/rsx_utils.h | 51 ++++++++++++++++--------- 6 files changed, 81 insertions(+), 113 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKFormats.h b/rpcs3/Emu/RSX/VK/VKFormats.h index 18f729ad97..146664afd3 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.h +++ b/rpcs3/Emu/RSX/VK/VKFormats.h @@ -17,5 +17,5 @@ namespace vk VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap); float max_aniso(rsx::texture_max_anisotropy gcm_aniso); std::array get_component_mapping(u32 format); - VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification); + VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type mode, bool &requires_modification); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 6ffad39d19..65172d76c9 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1444,8 +1444,9 @@ void VKGSRender::end() bool primitive_emulated = false; vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); - const bool is_emulated_restart = (!primitive_emulated && rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart() && rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed); - const bool single_draw = (!is_emulated_restart && (!supports_multidraw || rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive)); + const bool single_draw = (!supports_multidraw || + rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || + rsx::method_registers.current_draw_clause.is_disjoint_primitive); if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) { @@ -1485,23 +1486,12 @@ void VKGSRender::end() } else { - if (!is_emulated_restart) + u32 first_vertex = 0; + for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) { - u32 first_vertex = 0; - for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) - { - const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second); - vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, first_vertex, 0, 0); - first_vertex += verts; - } - } - else - { - for (const auto &range : rsx::method_registers.current_draw_clause.alternate_first_count_commands) - { - //Primitive restart splitting happens after the primitive type expansion step - vkCmdDrawIndexed(*m_current_command_buffer, range.second, 1, range.first, 0, 0); - } + const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second); + vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, first_vertex, 0, 0); + first_vertex += verts; } } } @@ -2236,7 +2226,7 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) properties.state.set_primitive_type(vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, emulated_primitive_type)); const bool restarts_valid = rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed && !emulated_primitive_type && !rsx::method_registers.current_draw_clause.is_disjoint_primitive; - if (rsx::method_registers.restart_index_enabled() && !vk::emulate_primitive_restart() && restarts_valid) + if (rsx::method_registers.restart_index_enabled() && !vk::emulate_primitive_restart(rsx::method_registers.current_draw_clause.primitive) && restarts_valid) properties.state.enable_primitive_restart(); // Rasterizer state diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 07a46e106f..9925cab6bb 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -17,7 +17,6 @@ namespace vk //Driver compatibility workarounds bool g_drv_no_primitive_restart_flag = false; - bool g_drv_force_32bit_indices = false; bool g_drv_sanitize_fp_values = false; bool g_drv_disable_fence_reset = false; @@ -209,35 +208,13 @@ namespace vk g_current_renderer = device; const auto gpu_name = g_current_renderer.gpu().name(); - bool gcn4_proprietary = false; - const std::array black_listed = + //Radeon fails to properly handle degenerate primitives if primitive restart is enabled + //One has to choose between using degenerate primitives or primitive restart to break up lists but not both + //Polaris and newer will crash with ERROR_DEVICE_LOST + //Older GCN will work okay most of the time but also occasionally draws garbage without reason + if (gpu_name.find("Radeon") != std::string::npos) { - // Black list all polaris unless its proven they dont have a problem with primitive restart - "RX 580", - "RX 570", - "RX 560", - "RX 550", - "RX 480", - "RX 470", - "RX 460", - "RX Vega", - }; - - for (const auto& test : black_listed) - { - if (gpu_name.find(test) != std::string::npos) - { - g_drv_no_primitive_restart_flag = !g_cfg.video.vk.force_primitive_restart; - gcn4_proprietary = true; - break; - } - } - - //Older cards back to GCN1 break primitive restart on 16-bit indices - if (!gcn4_proprietary && gpu_name.find("Radeon") != std::string::npos) - { - //gcn1 - gcn3 workarounds - g_drv_force_32bit_indices = true; + g_drv_no_primitive_restart_flag = !g_cfg.video.vk.force_primitive_restart; g_drv_disable_fence_reset = true; } @@ -248,14 +225,21 @@ namespace vk } } - bool emulate_primitive_restart() + bool emulate_primitive_restart(rsx::primitive_type type) { - return g_drv_no_primitive_restart_flag; - } + if (g_drv_no_primitive_restart_flag) + { + switch (type) + { + case rsx::primitive_type::triangle_strip: + case rsx::primitive_type::quad_strip: + return true; + default: + break; + } + } - bool force_32bit_index_buffer() - { - return g_drv_force_32bit_indices; + return false; } bool sanitize_fp_values() diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 4c988dcb67..1281595df7 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -71,8 +71,7 @@ namespace vk void set_current_renderer(const vk::render_device &device); //Compatibility workarounds - bool emulate_primitive_restart(); - bool force_32bit_index_buffer(); + bool emulate_primitive_restart(rsx::primitive_type type); bool sanitize_fp_values(); bool fence_reset_disabled(); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index a9be0b4b00..651e98732d 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -7,7 +7,7 @@ namespace vk { - VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification) + VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type mode, bool &requires_modification) { requires_modification = false; @@ -123,32 +123,30 @@ namespace vertex_input_state operator()(const rsx::draw_indexed_array_command& command) { - const bool primitive_restart_enabled = rsx::method_registers.restart_index_enabled(); - const bool emulate_primitive_restart = primitive_restart_enabled && vk::emulate_primitive_restart(); - const bool expand_indices_to_32bit = primitive_restart_enabled && !emulate_primitive_restart && vk::force_32bit_index_buffer(); - bool primitives_emulated = false; - VkPrimitiveTopology prims = vk::get_appropriate_topology( - rsx::method_registers.current_draw_clause.primitive, primitives_emulated); + auto primitive = rsx::method_registers.current_draw_clause.primitive; + const VkPrimitiveTopology prims = vk::get_appropriate_topology(primitive, primitives_emulated); + const bool emulate_restart = rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart(primitive); rsx::index_array_type index_type = rsx::method_registers.current_draw_clause.is_immediate_draw ? rsx::index_array_type::u32 : rsx::method_registers.index_type(); - rsx::index_array_type upload_type = expand_indices_to_32bit ? rsx::index_array_type::u32 : index_type; - u32 type_size = gsl::narrow(get_index_type_size(upload_type)); + u32 type_size = gsl::narrow(get_index_type_size(index_type)); u32 index_count = rsx::method_registers.current_draw_clause.get_elements_count(); if (primitives_emulated) index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, index_count); u32 upload_size = index_count * type_size; - VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + if (emulate_restart) upload_size *= 2; + + VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<4>(upload_size); void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); gsl::span dst; std::vector tmp; - if (emulate_primitive_restart || (expand_indices_to_32bit && index_type == rsx::index_array_type::u16)) + if (emulate_restart) { tmp.resize(upload_size); dst = tmp; @@ -158,9 +156,6 @@ namespace dst = gsl::span(static_cast(buf), upload_size); } - std::optional> index_info = - std::make_tuple(offset_in_index_buffer, vk::get_index_type(upload_type)); - /** * Upload index (and expands it if primitive type is not natively supported). */ @@ -177,43 +172,26 @@ namespace { //empty set, do not draw m_index_buffer_ring_info.unmap(); - return{ prims, 0, 0, 0, 0, index_info }; + return{ prims, 0, 0, 0, 0, {} }; } - if (tmp.size() > 0) + if (emulate_restart) { - if (emulate_primitive_restart) + if (index_type == rsx::index_array_type::u16) { - //Emulate primitive restart by breaking up the draw calls - rsx::method_registers.current_draw_clause.alternate_first_count_commands.resize(0); - rsx::method_registers.current_draw_clause.alternate_first_count_commands.reserve(index_count / 3); - - if (index_type == rsx::index_array_type::u16) - rsx::split_index_list(reinterpret_cast(tmp.data()), index_count, (u16)UINT16_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands); - else - rsx::split_index_list(reinterpret_cast(tmp.data()), index_count, (u32)UINT32_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands); - - memcpy(buf, tmp.data(), tmp.size()); + index_count = rsx::remove_restart_index((u16*)buf, (u16*)tmp.data(), index_count, (u16)UINT16_MAX); } else { - //Force 32-bit indices - verify(HERE), index_type == rsx::index_array_type::u16; - u32* dst = reinterpret_cast(buf); - u16* src = reinterpret_cast(tmp.data()); - for (u32 n = 0; n < index_count; ++n) - { - const auto index = src[n]; - if (index == UINT16_MAX) - dst[n] = UINT32_MAX; - else - dst[n] = index; - } + index_count = rsx::remove_restart_index((u32*)buf, (u32*)tmp.data(), index_count, (u32)UINT32_MAX); } } m_index_buffer_ring_info.unmap(); + std::optional> index_info = + std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type)); + //check for vertex arrays with frquency modifiers for (auto &block : m_vertex_layout.interleaved_blocks) { diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 37064e9da9..7af460b4b3 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -411,33 +411,50 @@ namespace rsx return result; } + /** + * Remove restart index and emulate using degenerate triangles + * Can be used as a workaround when restart_index doesnt work too well + * dst should be able to hold at least 2xcount entries + */ template - void split_index_list(T* indices, int index_count, T restart_index, std::vector>& out) + u32 remove_restart_index(T* dst, T* src, int count, T restart_index) { - int last_valid_index = -1; - int last_start = -1; - - for (int i = 0; i < index_count; ++i) + // Converts a stream e.g [1, 2, 3, -1, 4, 5, 6] to a stream with degenerate splits + // Output is e.g [1, 2, 3, 3, 3, 4, 4, 5, 6] (5 bogus triangles) + T last_index, index; + u32 dst_index = 0; + for (int n = 0; n < count;) { - if (indices[i] == restart_index) + index = src[n]; + if (index == restart_index) { - if (last_start >= 0) + for (; n < count; ++n) { - out.push_back(std::make_pair(last_start, i - last_start)); - last_start = -1; + if (src[n] != restart_index) + break; } - continue; + if (n == count) + return dst_index; + + dst[dst_index++] = last_index; //Duplicate last + + if ((dst_index & 1) == 0) + //Duplicate last again to fix face winding + dst[dst_index++] = last_index; + + last_index = src[n]; + dst[dst_index++] = last_index; //Duplicate next + } + else + { + dst[dst_index++] = index; + last_index = index; + ++n; } - - if (last_start < 0) - last_start = i; - - last_valid_index = i; } - if (last_start >= 0) - out.push_back(std::make_pair(last_start, last_valid_index - last_start + 1)); + return dst_index; } // The rsx internally adds the 'data_base_offset' and the 'vert_offset' and masks it