mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
vk: AMD primitive restart bug workaround
- Emulate primitive restart with degenerate triangles
This commit is contained in:
parent
b7979d3f57
commit
eccb57d4b8
6 changed files with 81 additions and 113 deletions
|
@ -17,5 +17,5 @@ namespace vk
|
|||
VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap);
|
||||
float max_aniso(rsx::texture_max_anisotropy gcm_aniso);
|
||||
std::array<VkComponentSwizzle, 4> get_component_mapping(u32 format);
|
||||
VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification);
|
||||
VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type mode, bool &requires_modification);
|
||||
}
|
||||
|
|
|
@ -1444,8 +1444,9 @@ void VKGSRender::end()
|
|||
bool primitive_emulated = false;
|
||||
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
|
||||
|
||||
const bool is_emulated_restart = (!primitive_emulated && rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart() && rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed);
|
||||
const bool single_draw = (!is_emulated_restart && (!supports_multidraw || rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive));
|
||||
const bool single_draw = (!supports_multidraw ||
|
||||
rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 ||
|
||||
rsx::method_registers.current_draw_clause.is_disjoint_primitive);
|
||||
|
||||
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
||||
{
|
||||
|
@ -1485,23 +1486,12 @@ void VKGSRender::end()
|
|||
}
|
||||
else
|
||||
{
|
||||
if (!is_emulated_restart)
|
||||
u32 first_vertex = 0;
|
||||
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
|
||||
{
|
||||
u32 first_vertex = 0;
|
||||
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
|
||||
{
|
||||
const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, first_vertex, 0, 0);
|
||||
first_vertex += verts;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto &range : rsx::method_registers.current_draw_clause.alternate_first_count_commands)
|
||||
{
|
||||
//Primitive restart splitting happens after the primitive type expansion step
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, range.second, 1, range.first, 0, 0);
|
||||
}
|
||||
const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, first_vertex, 0, 0);
|
||||
first_vertex += verts;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2236,7 +2226,7 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
|
|||
properties.state.set_primitive_type(vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, emulated_primitive_type));
|
||||
|
||||
const bool restarts_valid = rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed && !emulated_primitive_type && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
||||
if (rsx::method_registers.restart_index_enabled() && !vk::emulate_primitive_restart() && restarts_valid)
|
||||
if (rsx::method_registers.restart_index_enabled() && !vk::emulate_primitive_restart(rsx::method_registers.current_draw_clause.primitive) && restarts_valid)
|
||||
properties.state.enable_primitive_restart();
|
||||
|
||||
// Rasterizer state
|
||||
|
|
|
@ -17,7 +17,6 @@ namespace vk
|
|||
|
||||
//Driver compatibility workarounds
|
||||
bool g_drv_no_primitive_restart_flag = false;
|
||||
bool g_drv_force_32bit_indices = false;
|
||||
bool g_drv_sanitize_fp_values = false;
|
||||
bool g_drv_disable_fence_reset = false;
|
||||
|
||||
|
@ -209,35 +208,13 @@ namespace vk
|
|||
g_current_renderer = device;
|
||||
const auto gpu_name = g_current_renderer.gpu().name();
|
||||
|
||||
bool gcn4_proprietary = false;
|
||||
const std::array<std::string, 8> black_listed =
|
||||
//Radeon fails to properly handle degenerate primitives if primitive restart is enabled
|
||||
//One has to choose between using degenerate primitives or primitive restart to break up lists but not both
|
||||
//Polaris and newer will crash with ERROR_DEVICE_LOST
|
||||
//Older GCN will work okay most of the time but also occasionally draws garbage without reason
|
||||
if (gpu_name.find("Radeon") != std::string::npos)
|
||||
{
|
||||
// Black list all polaris unless its proven they dont have a problem with primitive restart
|
||||
"RX 580",
|
||||
"RX 570",
|
||||
"RX 560",
|
||||
"RX 550",
|
||||
"RX 480",
|
||||
"RX 470",
|
||||
"RX 460",
|
||||
"RX Vega",
|
||||
};
|
||||
|
||||
for (const auto& test : black_listed)
|
||||
{
|
||||
if (gpu_name.find(test) != std::string::npos)
|
||||
{
|
||||
g_drv_no_primitive_restart_flag = !g_cfg.video.vk.force_primitive_restart;
|
||||
gcn4_proprietary = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//Older cards back to GCN1 break primitive restart on 16-bit indices
|
||||
if (!gcn4_proprietary && gpu_name.find("Radeon") != std::string::npos)
|
||||
{
|
||||
//gcn1 - gcn3 workarounds
|
||||
g_drv_force_32bit_indices = true;
|
||||
g_drv_no_primitive_restart_flag = !g_cfg.video.vk.force_primitive_restart;
|
||||
g_drv_disable_fence_reset = true;
|
||||
}
|
||||
|
||||
|
@ -248,14 +225,21 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
bool emulate_primitive_restart()
|
||||
bool emulate_primitive_restart(rsx::primitive_type type)
|
||||
{
|
||||
return g_drv_no_primitive_restart_flag;
|
||||
}
|
||||
if (g_drv_no_primitive_restart_flag)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case rsx::primitive_type::triangle_strip:
|
||||
case rsx::primitive_type::quad_strip:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool force_32bit_index_buffer()
|
||||
{
|
||||
return g_drv_force_32bit_indices;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool sanitize_fp_values()
|
||||
|
|
|
@ -71,8 +71,7 @@ namespace vk
|
|||
void set_current_renderer(const vk::render_device &device);
|
||||
|
||||
//Compatibility workarounds
|
||||
bool emulate_primitive_restart();
|
||||
bool force_32bit_index_buffer();
|
||||
bool emulate_primitive_restart(rsx::primitive_type type);
|
||||
bool sanitize_fp_values();
|
||||
bool fence_reset_disabled();
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
namespace vk
|
||||
{
|
||||
VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification)
|
||||
VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type mode, bool &requires_modification)
|
||||
{
|
||||
requires_modification = false;
|
||||
|
||||
|
@ -123,32 +123,30 @@ namespace
|
|||
|
||||
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
|
||||
{
|
||||
const bool primitive_restart_enabled = rsx::method_registers.restart_index_enabled();
|
||||
const bool emulate_primitive_restart = primitive_restart_enabled && vk::emulate_primitive_restart();
|
||||
const bool expand_indices_to_32bit = primitive_restart_enabled && !emulate_primitive_restart && vk::force_32bit_index_buffer();
|
||||
|
||||
bool primitives_emulated = false;
|
||||
VkPrimitiveTopology prims = vk::get_appropriate_topology(
|
||||
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
|
||||
auto primitive = rsx::method_registers.current_draw_clause.primitive;
|
||||
const VkPrimitiveTopology prims = vk::get_appropriate_topology(primitive, primitives_emulated);
|
||||
const bool emulate_restart = rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart(primitive);
|
||||
|
||||
rsx::index_array_type index_type = rsx::method_registers.current_draw_clause.is_immediate_draw ?
|
||||
rsx::index_array_type::u32 :
|
||||
rsx::method_registers.index_type();
|
||||
|
||||
rsx::index_array_type upload_type = expand_indices_to_32bit ? rsx::index_array_type::u32 : index_type;
|
||||
u32 type_size = gsl::narrow<u32>(get_index_type_size(upload_type));
|
||||
u32 type_size = gsl::narrow<u32>(get_index_type_size(index_type));
|
||||
|
||||
u32 index_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
if (primitives_emulated)
|
||||
index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, index_count);
|
||||
u32 upload_size = index_count * type_size;
|
||||
|
||||
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
|
||||
if (emulate_restart) upload_size *= 2;
|
||||
|
||||
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<4>(upload_size);
|
||||
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
|
||||
|
||||
gsl::span<gsl::byte> dst;
|
||||
std::vector<gsl::byte> tmp;
|
||||
if (emulate_primitive_restart || (expand_indices_to_32bit && index_type == rsx::index_array_type::u16))
|
||||
if (emulate_restart)
|
||||
{
|
||||
tmp.resize(upload_size);
|
||||
dst = tmp;
|
||||
|
@ -158,9 +156,6 @@ namespace
|
|||
dst = gsl::span<gsl::byte>(static_cast<gsl::byte*>(buf), upload_size);
|
||||
}
|
||||
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
|
||||
std::make_tuple(offset_in_index_buffer, vk::get_index_type(upload_type));
|
||||
|
||||
/**
|
||||
* Upload index (and expands it if primitive type is not natively supported).
|
||||
*/
|
||||
|
@ -177,43 +172,26 @@ namespace
|
|||
{
|
||||
//empty set, do not draw
|
||||
m_index_buffer_ring_info.unmap();
|
||||
return{ prims, 0, 0, 0, 0, index_info };
|
||||
return{ prims, 0, 0, 0, 0, {} };
|
||||
}
|
||||
|
||||
if (tmp.size() > 0)
|
||||
if (emulate_restart)
|
||||
{
|
||||
if (emulate_primitive_restart)
|
||||
if (index_type == rsx::index_array_type::u16)
|
||||
{
|
||||
//Emulate primitive restart by breaking up the draw calls
|
||||
rsx::method_registers.current_draw_clause.alternate_first_count_commands.resize(0);
|
||||
rsx::method_registers.current_draw_clause.alternate_first_count_commands.reserve(index_count / 3);
|
||||
|
||||
if (index_type == rsx::index_array_type::u16)
|
||||
rsx::split_index_list(reinterpret_cast<u16*>(tmp.data()), index_count, (u16)UINT16_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands);
|
||||
else
|
||||
rsx::split_index_list(reinterpret_cast<u32*>(tmp.data()), index_count, (u32)UINT32_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands);
|
||||
|
||||
memcpy(buf, tmp.data(), tmp.size());
|
||||
index_count = rsx::remove_restart_index((u16*)buf, (u16*)tmp.data(), index_count, (u16)UINT16_MAX);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Force 32-bit indices
|
||||
verify(HERE), index_type == rsx::index_array_type::u16;
|
||||
u32* dst = reinterpret_cast<u32*>(buf);
|
||||
u16* src = reinterpret_cast<u16*>(tmp.data());
|
||||
for (u32 n = 0; n < index_count; ++n)
|
||||
{
|
||||
const auto index = src[n];
|
||||
if (index == UINT16_MAX)
|
||||
dst[n] = UINT32_MAX;
|
||||
else
|
||||
dst[n] = index;
|
||||
}
|
||||
index_count = rsx::remove_restart_index((u32*)buf, (u32*)tmp.data(), index_count, (u32)UINT32_MAX);
|
||||
}
|
||||
}
|
||||
|
||||
m_index_buffer_ring_info.unmap();
|
||||
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
|
||||
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
|
||||
|
||||
//check for vertex arrays with frquency modifiers
|
||||
for (auto &block : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
|
|
|
@ -411,33 +411,50 @@ namespace rsx
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove restart index and emulate using degenerate triangles
|
||||
* Can be used as a workaround when restart_index doesnt work too well
|
||||
* dst should be able to hold at least 2xcount entries
|
||||
*/
|
||||
template <typename T>
|
||||
void split_index_list(T* indices, int index_count, T restart_index, std::vector<std::pair<u32, u32>>& out)
|
||||
u32 remove_restart_index(T* dst, T* src, int count, T restart_index)
|
||||
{
|
||||
int last_valid_index = -1;
|
||||
int last_start = -1;
|
||||
|
||||
for (int i = 0; i < index_count; ++i)
|
||||
// Converts a stream e.g [1, 2, 3, -1, 4, 5, 6] to a stream with degenerate splits
|
||||
// Output is e.g [1, 2, 3, 3, 3, 4, 4, 5, 6] (5 bogus triangles)
|
||||
T last_index, index;
|
||||
u32 dst_index = 0;
|
||||
for (int n = 0; n < count;)
|
||||
{
|
||||
if (indices[i] == restart_index)
|
||||
index = src[n];
|
||||
if (index == restart_index)
|
||||
{
|
||||
if (last_start >= 0)
|
||||
for (; n < count; ++n)
|
||||
{
|
||||
out.push_back(std::make_pair(last_start, i - last_start));
|
||||
last_start = -1;
|
||||
if (src[n] != restart_index)
|
||||
break;
|
||||
}
|
||||
|
||||
continue;
|
||||
if (n == count)
|
||||
return dst_index;
|
||||
|
||||
dst[dst_index++] = last_index; //Duplicate last
|
||||
|
||||
if ((dst_index & 1) == 0)
|
||||
//Duplicate last again to fix face winding
|
||||
dst[dst_index++] = last_index;
|
||||
|
||||
last_index = src[n];
|
||||
dst[dst_index++] = last_index; //Duplicate next
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[dst_index++] = index;
|
||||
last_index = index;
|
||||
++n;
|
||||
}
|
||||
|
||||
if (last_start < 0)
|
||||
last_start = i;
|
||||
|
||||
last_valid_index = i;
|
||||
}
|
||||
|
||||
if (last_start >= 0)
|
||||
out.push_back(std::make_pair(last_start, last_valid_index - last_start + 1));
|
||||
return dst_index;
|
||||
}
|
||||
|
||||
// The rsx internally adds the 'data_base_offset' and the 'vert_offset' and masks it
|
||||
|
|
Loading…
Add table
Reference in a new issue