vk: Implement double-buffered heaps for platforms without universal support for host visibility (APPLE)

This commit is contained in:
kd-11 2018-08-23 21:56:06 +03:00 committed by kd-11
commit f0a91484a0
4 changed files with 163 additions and 21 deletions

View file

@ -565,7 +565,7 @@ VKGSRender::VKGSRender() : GSRender()
//Create secondary command_buffer for parallel operations //Create secondary command_buffer for parallel operations
m_secondary_command_buffer_pool.create((*m_device)); m_secondary_command_buffer_pool.create((*m_device));
m_secondary_command_buffer.create(m_secondary_command_buffer_pool); m_secondary_command_buffer.create(m_secondary_command_buffer_pool, true);
m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all; m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all;
//Precalculated stuff //Precalculated stuff
@ -601,7 +601,7 @@ VKGSRender::VKGSRender() : GSRender()
} }
const auto& memory_map = m_device->get_memory_mapping(); const auto& memory_map = m_device->get_memory_mapping();
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32); null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32);
vk::initialize_compiler_context(); vk::initialize_compiler_context();
@ -2504,6 +2504,25 @@ void VKGSRender::write_buffers()
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{ {
if (m_attrib_ring_info.dirty() ||
m_uniform_buffer_ring_info.dirty() ||
m_index_buffer_ring_info.dirty() ||
m_transform_constants_ring_info.dirty() ||
m_texture_upload_buffer_ring_info.dirty())
{
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
m_secondary_command_buffer.begin();
m_attrib_ring_info.sync(m_secondary_command_buffer);
m_uniform_buffer_ring_info.sync(m_secondary_command_buffer);
m_index_buffer_ring_info.sync(m_secondary_command_buffer);
m_transform_constants_ring_info.sync(m_secondary_command_buffer);
m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer);
m_secondary_command_buffer.end();
m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
}
m_current_command_buffer->end(); m_current_command_buffer->end();
m_current_command_buffer->tag(); m_current_command_buffer->tag();
m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), semaphores, fence, pipeline_stage_flags); m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), semaphores, fence, pipeline_stage_flags);

View file

@ -19,6 +19,7 @@ namespace vk
atomic_t<bool> g_cb_no_interrupt_flag { false }; atomic_t<bool> g_cb_no_interrupt_flag { false };
//Driver compatibility workarounds //Driver compatibility workarounds
VkFlags g_heap_compatible_buffer_types = 0;
driver_vendor g_driver_vendor = driver_vendor::unknown; driver_vendor g_driver_vendor = driver_vendor::unknown;
bool g_drv_no_primitive_restart_flag = false; bool g_drv_no_primitive_restart_flag = false;
bool g_drv_sanitize_fp_values = false; bool g_drv_sanitize_fp_values = false;
@ -273,6 +274,7 @@ namespace vk
g_num_processed_frames = 0; g_num_processed_frames = 0;
g_num_total_frames = 0; g_num_total_frames = 0;
g_driver_vendor = driver_vendor::unknown; g_driver_vendor = driver_vendor::unknown;
g_heap_compatible_buffer_types = 0;
const auto gpu_name = g_current_renderer->gpu().name(); const auto gpu_name = g_current_renderer->gpu().name();
@ -313,6 +315,49 @@ namespace vk
LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name); LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name);
} }
} }
{
// Buffer memory tests, only useful for portability on macOS
VkBufferUsageFlags types[] =
{
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT
};
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
VkBuffer tmp;
VkMemoryRequirements memory_reqs;
VkBufferCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.size = 4096;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
info.flags = 0;
for (const auto &usage : types)
{
info.usage = usage;
CHECK_RESULT(vkCreateBuffer(*g_current_renderer, &info, nullptr, &tmp));
vkGetBufferMemoryRequirements(*g_current_renderer, tmp, &memory_reqs);
if (g_current_renderer->get_compatible_memory_type(memory_reqs.memoryTypeBits, memory_flags, nullptr))
{
g_heap_compatible_buffer_types |= usage;
}
vkDestroyBuffer(*g_current_renderer, tmp, nullptr);
}
}
}
VkFlags get_heap_compatible_buffer_types()
{
return g_heap_compatible_buffer_types;
} }
driver_vendor get_driver_vendor() driver_vendor get_driver_vendor()

View file

@ -96,6 +96,7 @@ namespace vk
bool emulate_primitive_restart(rsx::primitive_type type); bool emulate_primitive_restart(rsx::primitive_type type);
bool sanitize_fp_values(); bool sanitize_fp_values();
bool fence_reset_disabled(); bool fence_reset_disabled();
VkFlags get_heap_compatible_buffer_types();
driver_vendor get_driver_vendor(); driver_vendor get_driver_vendor();
VkComponentMapping default_component_map(); VkComponentMapping default_component_map();
@ -535,7 +536,11 @@ namespace vk
{ {
if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask) if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask)
{ {
*type_index = i; if (type_index)
{
*type_index = i;
}
return true; return true;
} }
} }
@ -1078,6 +1083,8 @@ namespace vk
{ {
private: private:
bool is_open = false; bool is_open = false;
bool is_pending = false;
VkFence m_submit_fence = VK_NULL_HANDLE;
protected: protected:
vk::command_pool *pool = nullptr; vk::command_pool *pool = nullptr;
@ -1095,21 +1102,33 @@ namespace vk
command_buffer() {} command_buffer() {}
~command_buffer() {} ~command_buffer() {}
void create(vk::command_pool &cmd_pool) void create(vk::command_pool &cmd_pool, bool auto_reset = false)
{ {
VkCommandBufferAllocateInfo infos = {}; VkCommandBufferAllocateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
infos.commandBufferCount = 1; infos.commandBufferCount = 1;
infos.commandPool = (VkCommandPool)cmd_pool; infos.commandPool = (VkCommandPool)cmd_pool;
infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands)); CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands));
if (auto_reset)
{
VkFenceCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence));
}
pool = &cmd_pool; pool = &cmd_pool;
} }
void destroy() void destroy()
{ {
vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands); vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands);
if (m_submit_fence)
{
vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr);
}
} }
vk::command_pool& get_command_pool() const vk::command_pool& get_command_pool() const
@ -1124,6 +1143,15 @@ namespace vk
void begin() void begin()
{ {
if (m_submit_fence && is_pending)
{
while (vkGetFenceStatus(pool->get_owner(), m_submit_fence) != VK_SUCCESS);
is_pending = false;
CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
if (is_open) if (is_open)
return; return;
@ -1158,6 +1186,11 @@ namespace vk
return; return;
} }
if (fence == VK_NULL_HANDLE)
{
fence = m_submit_fence;
}
VkSubmitInfo infos = {}; VkSubmitInfo infos = {};
infos.commandBufferCount = 1; infos.commandBufferCount = 1;
infos.pCommandBuffers = &commands; infos.pCommandBuffers = &commands;
@ -1169,6 +1202,8 @@ namespace vk
acquire_global_submit_lock(); acquire_global_submit_lock();
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
release_global_submit_lock(); release_global_submit_lock();
is_pending = true;
} }
}; };
@ -2695,50 +2730,98 @@ public:
bool mapped = false; bool mapped = false;
void *_ptr = nullptr; void *_ptr = nullptr;
std::unique_ptr<buffer> shadow;
std::vector<VkBufferCopy> dirty_ranges;
// NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow // NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow
// Avoid mapping/unmapping to keep these drivers from stalling // Avoid mapping/unmapping to keep these drivers from stalling
// NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either // NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either
void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000) void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000)
{ {
data_heap::init(size, name, guard);
const auto device = get_current_renderer(); const auto device = get_current_renderer();
const auto memory_map = device->get_memory_mapping(); const auto memory_map = device->get_memory_mapping();
const VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
data_heap::init(size, name, guard); VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
heap.reset(new buffer(*device, size, memory_map.host_visible_coherent, memory_flags, usage, 0)); auto memory_index = memory_map.host_visible_coherent;
if (!(get_heap_compatible_buffer_types() & usage))
{
LOG_WARNING(RSX, "Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", (u32)usage);
shadow.reset(new buffer(*device, size, memory_index, memory_flags, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
memory_index = memory_map.device_local;
}
heap.reset(new buffer(*device, size, memory_index, memory_flags, usage, 0));
} }
void destroy() void destroy()
{ {
if (mapped) if (mapped)
{ {
heap->unmap(); unmap(true);
mapped = false;
} }
heap.reset(); heap.reset();
shadow.reset();
} }
void* map(size_t offset, size_t size) void* map(size_t offset, size_t size)
{ {
if (!_ptr) if (!_ptr)
{ {
_ptr = heap->map(0, heap->size()); if (shadow)
_ptr = shadow->map(0, shadow->size());
else
_ptr = heap->map(0, heap->size());
mapped = true; mapped = true;
} }
if (shadow)
{
dirty_ranges.push_back({offset, offset, size});
}
return (u8*)_ptr + offset; return (u8*)_ptr + offset;
} }
void unmap() void unmap(bool force = false)
{ {
if (g_cfg.video.disable_vulkan_mem_allocator) if (force || g_cfg.video.disable_vulkan_mem_allocator)
{ {
heap->unmap(); if (shadow)
shadow->unmap();
else
heap->unmap();
mapped = false; mapped = false;
_ptr = nullptr; _ptr = nullptr;
} }
} }
bool dirty()
{
return !dirty_ranges.empty();
}
void sync(const vk::command_buffer& cmd)
{
if (!dirty_ranges.empty())
{
verify (HERE), shadow, heap;
vkCmdCopyBuffer(cmd, shadow->value, heap->value, (u32)dirty_ranges.size(), dirty_ranges.data());
dirty_ranges.resize(0);
insert_buffer_memory_barrier(cmd, heap->value, 0, heap->size(),
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
}
}
}; };
} }

View file

@ -58,13 +58,8 @@ namespace vk
{ {
if (!m_vao.heap) if (!m_vao.heap)
{ {
auto memory_types = vk::get_memory_mapping(m_device->gpu()); m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128);
m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128);
m_vao.init(1 * 0x100000, "overlays VAO", 128);
m_vao.heap = std::make_unique<vk::buffer>(*m_device, 1 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0);
m_ubo.init(8 * 0x100000, "overlays UBO", 128);
m_ubo.heap = std::make_unique<vk::buffer>(*m_device, 8 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0);
} }
} }