vulkan: Improve primitive restart workaround for vega/polaris

- For some reason the hardware forgets that primitive restart is enabled and tries to actually read vertex index 65535
- Works correctly if uint32 vertex indices are used instead of uint16 for cases where primitive restart is active
This commit is contained in:
kd-11 2018-02-05 23:36:45 +03:00
parent a8ab408f64
commit 32f0e91893
4 changed files with 53 additions and 15 deletions

View file

@ -25,11 +25,12 @@ namespace vk
using shader_cache = rsx::shaders_cache<vk::pipeline_props, VKProgramBuffer>;
}
//Heap allocation sizes in MB - each 'frame' owns a private heap, one of each kind
//Heap allocation sizes in MB
//NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px)
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 64
#define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 128
#define VK_MAX_ASYNC_CB_COUNT 64
#define VK_MAX_ASYNC_FRAMES 2

View file

@ -15,6 +15,7 @@ namespace vk
atomic_t<bool> g_cb_no_interrupt_flag { false };
atomic_t<bool> g_drv_no_primitive_restart_flag { false };
atomic_t<bool> g_drv_force_32bit_indices{ false };
u64 g_num_processed_frames = 0;
u64 g_num_total_frames = 0;
@ -280,8 +281,9 @@ namespace vk
void set_current_renderer(const vk::render_device &device)
{
g_current_renderer = device;
const auto gpu_name = g_current_renderer.gpu().name();
const std::array<std::string, 8> black_listed =
/* const std::array<std::string, 8> black_listed =
{
// Black list all polaris unless its proven they dont have a problem with primitive restart
"RX 580",
@ -294,7 +296,6 @@ namespace vk
"RX Vega",
};
const auto gpu_name = g_current_renderer.gpu().name();
for (const auto& test : black_listed)
{
if (gpu_name.find(test) != std::string::npos)
@ -302,6 +303,11 @@ namespace vk
g_drv_no_primitive_restart_flag = !g_cfg.video.vk.force_primitive_restart;
break;
}
}*/
if (gpu_name.find("AMD") != std::string::npos)
{
g_drv_force_32bit_indices = true;
}
}
@ -310,6 +316,11 @@ namespace vk
return g_drv_no_primitive_restart_flag;
}
bool force_32bit_index_buffer()
{
return g_drv_force_32bit_indices;
}
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range)
{
//Prepare an image to match the new layout..

View file

@ -65,7 +65,9 @@ namespace vk
vk::render_device *get_current_renderer();
void set_current_renderer(const vk::render_device &device);
//Compatibility workarounds
bool emulate_primitive_restart();
bool force_32bit_index_buffer();
VkComponentMapping default_component_map();
VkImageSubresource default_image_subresource();

View file

@ -123,6 +123,10 @@ namespace
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
{
const bool primitive_restart_enabled = rsx::method_registers.restart_index_enabled();
const bool emulate_primitive_restart = primitive_restart_enabled && vk::emulate_primitive_restart();
const bool expand_indices_to_32bit = primitive_restart_enabled && !emulate_primitive_restart && vk::force_32bit_index_buffer();
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
@ -131,7 +135,8 @@ namespace
rsx::index_array_type::u32 :
rsx::method_registers.index_type();
u32 type_size = gsl::narrow<u32>(get_index_type_size(index_type));
rsx::index_array_type upload_type = expand_indices_to_32bit ? rsx::index_array_type::u32 : index_type;
u32 type_size = gsl::narrow<u32>(get_index_type_size(upload_type));
u32 index_count = rsx::method_registers.current_draw_clause.get_elements_count();
if (primitives_emulated)
@ -143,7 +148,7 @@ namespace
gsl::span<gsl::byte> dst;
std::vector<gsl::byte> tmp;
if (rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart())
if (emulate_primitive_restart || (expand_indices_to_32bit && index_type == rsx::index_array_type::u16))
{
tmp.resize(upload_size);
dst = tmp;
@ -154,7 +159,7 @@ namespace
}
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
std::make_tuple(offset_in_index_buffer, vk::get_index_type(upload_type));
/**
* Upload index (and expands it if primitive type is not natively supported).
@ -175,17 +180,36 @@ namespace
return{ prims, 0, 0, 0, 0, index_info };
}
if (rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart())
if (tmp.size() > 0)
{
//Emulate primitive restart by breaking up the draw calls
rsx::method_registers.current_draw_clause.alternate_first_count_commands.resize(0);
if (emulate_primitive_restart)
{
//Emulate primitive restart by breaking up the draw calls
rsx::method_registers.current_draw_clause.alternate_first_count_commands.resize(0);
rsx::method_registers.current_draw_clause.alternate_first_count_commands.reserve(index_count / 3);
if (index_type == rsx::index_array_type::u16)
rsx::split_index_list(reinterpret_cast<u16*>(tmp.data()), index_count, (u16)UINT16_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands);
if (index_type == rsx::index_array_type::u16)
rsx::split_index_list(reinterpret_cast<u16*>(tmp.data()), index_count, (u16)UINT16_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands);
else
rsx::split_index_list(reinterpret_cast<u32*>(tmp.data()), index_count, (u32)UINT32_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands);
memcpy(buf, tmp.data(), tmp.size());
}
else
rsx::split_index_list(reinterpret_cast<u32*>(tmp.data()), index_count, (u32)UINT32_MAX, rsx::method_registers.current_draw_clause.alternate_first_count_commands);
memcpy(buf, tmp.data(), tmp.size());
{
//Force 32-bit indices
verify(HERE), index_type == rsx::index_array_type::u16;
u32* dst = reinterpret_cast<u32*>(buf);
u16* src = reinterpret_cast<u16*>(tmp.data());
for (u32 n = 0; n < index_count; ++n)
{
const auto index = src[n];
if (index == UINT16_MAX)
dst[n] = UINT32_MAX;
else
dst[n] = index;
}
}
}
m_index_buffer_ring_info.unmap();