diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 1e28eb6d6e..82cf605186 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -565,7 +565,7 @@ VKGSRender::VKGSRender() : GSRender() //Create secondary command_buffer for parallel operations m_secondary_command_buffer_pool.create((*m_device)); - m_secondary_command_buffer.create(m_secondary_command_buffer_pool); + m_secondary_command_buffer.create(m_secondary_command_buffer_pool, true); m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all; //Precalculated stuff @@ -601,7 +601,7 @@ VKGSRender::VKGSRender() : GSRender() } const auto& memory_map = m_device->get_memory_mapping(); - null_buffer = std::make_unique(*m_device, 32, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); + null_buffer = std::make_unique(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); null_buffer_view = std::make_unique(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32); vk::initialize_compiler_context(); @@ -2504,6 +2504,25 @@ void VKGSRender::write_buffers() void VKGSRender::close_and_submit_command_buffer(const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) { + if (m_attrib_ring_info.dirty() || + m_uniform_buffer_ring_info.dirty() || + m_index_buffer_ring_info.dirty() || + m_transform_constants_ring_info.dirty() || + m_texture_upload_buffer_ring_info.dirty()) + { + std::lock_guard lock(m_secondary_cb_guard); + m_secondary_command_buffer.begin(); + + m_attrib_ring_info.sync(m_secondary_command_buffer); + m_uniform_buffer_ring_info.sync(m_secondary_command_buffer); + m_index_buffer_ring_info.sync(m_secondary_command_buffer); + m_transform_constants_ring_info.sync(m_secondary_command_buffer); + m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer); + + m_secondary_command_buffer.end(); + m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + } + m_current_command_buffer->end(); m_current_command_buffer->tag(); m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), semaphores, fence, pipeline_stage_flags); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 8fb8726be8..3b21142766 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -19,6 +19,7 @@ namespace vk atomic_t g_cb_no_interrupt_flag { false }; //Driver compatibility workarounds + VkFlags g_heap_compatible_buffer_types = 0; driver_vendor g_driver_vendor = driver_vendor::unknown; bool g_drv_no_primitive_restart_flag = false; bool g_drv_sanitize_fp_values = false; @@ -273,6 +274,7 @@ namespace vk g_num_processed_frames = 0; g_num_total_frames = 0; g_driver_vendor = driver_vendor::unknown; + g_heap_compatible_buffer_types = 0; const auto gpu_name = g_current_renderer->gpu().name(); @@ -313,6 +315,49 @@ namespace vk LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name); } } + + { + // Buffer memory tests, only useful for portability on macOS + VkBufferUsageFlags types[] = + { + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT + }; + + VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + VkBuffer tmp; + VkMemoryRequirements memory_reqs; + + VkBufferCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + info.size = 4096; + info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + info.flags = 0; + + for (const auto &usage : types) + { + info.usage = usage; + CHECK_RESULT(vkCreateBuffer(*g_current_renderer, &info, nullptr, &tmp)); + + vkGetBufferMemoryRequirements(*g_current_renderer, tmp, &memory_reqs); + if (g_current_renderer->get_compatible_memory_type(memory_reqs.memoryTypeBits, memory_flags, nullptr)) + { + g_heap_compatible_buffer_types |= usage; + } + + vkDestroyBuffer(*g_current_renderer, tmp, nullptr); + } + } + } + + VkFlags get_heap_compatible_buffer_types() + { + return g_heap_compatible_buffer_types; } driver_vendor get_driver_vendor() diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index a949266831..e9f8fdfc35 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -96,6 +96,7 @@ namespace vk bool emulate_primitive_restart(rsx::primitive_type type); bool sanitize_fp_values(); bool fence_reset_disabled(); + VkFlags get_heap_compatible_buffer_types(); driver_vendor get_driver_vendor(); VkComponentMapping default_component_map(); @@ -535,7 +536,11 @@ namespace vk { if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask) { - *type_index = i; + if (type_index) + { + *type_index = i; + } + return true; } } @@ -1078,6 +1083,8 @@ namespace vk { private: bool is_open = false; + bool is_pending = false; + VkFence m_submit_fence = VK_NULL_HANDLE; protected: vk::command_pool *pool = nullptr; @@ -1095,21 +1102,33 @@ namespace vk command_buffer() {} ~command_buffer() {} - void create(vk::command_pool &cmd_pool) + void create(vk::command_pool &cmd_pool, bool auto_reset = false) { VkCommandBufferAllocateInfo infos = {}; infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; infos.commandBufferCount = 1; infos.commandPool = (VkCommandPool)cmd_pool; infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands)); + + if (auto_reset) + { + VkFenceCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence)); + } + pool = &cmd_pool; } void destroy() { vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands); + + if (m_submit_fence) + { + vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr); + } } vk::command_pool& get_command_pool() const @@ -1124,6 +1143,15 @@ namespace vk void begin() { + if (m_submit_fence && is_pending) + { + while (vkGetFenceStatus(pool->get_owner(), m_submit_fence) != VK_SUCCESS); + is_pending = false; + + CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence)); + CHECK_RESULT(vkResetCommandBuffer(commands, 0)); + } + if (is_open) return; @@ -1158,6 +1186,11 @@ namespace vk return; } + if (fence == VK_NULL_HANDLE) + { + fence = m_submit_fence; + } + VkSubmitInfo infos = {}; infos.commandBufferCount = 1; infos.pCommandBuffers = &commands; @@ -1169,6 +1202,8 @@ namespace vk acquire_global_submit_lock(); CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); release_global_submit_lock(); + + is_pending = true; } }; @@ -2695,50 +2730,98 @@ public: bool mapped = false; void *_ptr = nullptr; + std::unique_ptr shadow; + std::vector dirty_ranges; + // NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow // Avoid mapping/unmapping to keep these drivers from stalling // NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000) { + data_heap::init(size, name, guard); + const auto device = get_current_renderer(); const auto memory_map = device->get_memory_mapping(); - const VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - data_heap::init(size, name, guard); - heap.reset(new buffer(*device, size, memory_map.host_visible_coherent, memory_flags, usage, 0)); + VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + auto memory_index = memory_map.host_visible_coherent; + + if (!(get_heap_compatible_buffer_types() & usage)) + { + LOG_WARNING(RSX, "Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", (u32)usage); + + shadow.reset(new buffer(*device, size, memory_index, memory_flags, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); + usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + memory_index = memory_map.device_local; + } + + heap.reset(new buffer(*device, size, memory_index, memory_flags, usage, 0)); } void destroy() { if (mapped) { - heap->unmap(); - mapped = false; + unmap(true); } heap.reset(); + shadow.reset(); } void* map(size_t offset, size_t size) { if (!_ptr) { - _ptr = heap->map(0, heap->size()); + if (shadow) + _ptr = shadow->map(0, shadow->size()); + else + _ptr = heap->map(0, heap->size()); + mapped = true; } + if (shadow) + { + dirty_ranges.push_back({offset, offset, size}); + } + return (u8*)_ptr + offset; } - void unmap() + void unmap(bool force = false) { - if (g_cfg.video.disable_vulkan_mem_allocator) + if (force || g_cfg.video.disable_vulkan_mem_allocator) { - heap->unmap(); + if (shadow) + shadow->unmap(); + else + heap->unmap(); + mapped = false; _ptr = nullptr; } } + + bool dirty() + { + return !dirty_ranges.empty(); + } + + void sync(const vk::command_buffer& cmd) + { + if (!dirty_ranges.empty()) + { + verify (HERE), shadow, heap; + vkCmdCopyBuffer(cmd, shadow->value, heap->value, (u32)dirty_ranges.size(), dirty_ranges.data()); + dirty_ranges.resize(0); + + insert_buffer_memory_barrier(cmd, heap->value, 0, heap->size(), + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + } + } }; } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 27acc00814..b061294c47 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -58,13 +58,8 @@ namespace vk { if (!m_vao.heap) { - auto memory_types = vk::get_memory_mapping(m_device->gpu()); - - m_vao.init(1 * 0x100000, "overlays VAO", 128); - m_vao.heap = std::make_unique(*m_device, 1 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0); - - m_ubo.init(8 * 0x100000, "overlays UBO", 128); - m_ubo.heap = std::make_unique(*m_device, 8 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0); + m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128); + m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128); } }