diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 568d72f869..892328fa80 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -386,20 +386,13 @@ void VKGSRender::load_texture_env() // We have to do this here, because we have to assume the CB will be dumped auto& async_task_scheduler = g_fxo->get(); - if (async_task_scheduler.is_recording()) + if (async_task_scheduler.is_recording() && + !async_task_scheduler.is_host_mode()) { - if (async_task_scheduler.is_host_mode()) + // Sync any async scheduler tasks + if (auto ev = async_task_scheduler.get_primary_sync_label()) { - flush_command_queue(); - ensure(!async_task_scheduler.is_recording()); - } - else - { - // Sync any async scheduler tasks - if (auto ev = async_task_scheduler.get_primary_sync_label()) - { - ev->gpu_wait(*m_current_command_buffer); - } + ev->gpu_wait(*m_current_command_buffer); } } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index c503ff21d0..5876a9b573 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2252,11 +2252,8 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore m_current_command_buffer->end(); m_current_command_buffer->tag(); - // Flush any asynchronously scheduled jobs - // So this is a bit trippy, but, in this case, the primary CB contains the 'release' operations, not the acquire ones. - // The CB that comes in after this submit will acquire the yielded resources automatically. - // This means the primary CB is the precursor to the async CB not the other way around. - // Async CB should wait for the primary CB to signal. + // Supporting concurrent access vastly simplifies this logic. + // Instead of doing CB slice injection, we can just chain these together logically with the async stream going first vk::queue_submit_t primary_submit_info{ m_device->get_graphics_queue(), pFence }; vk::queue_submit_t secondary_submit_info{}; @@ -2265,28 +2262,20 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore primary_submit_info.wait_on(wait_semaphore, pipeline_stage_flags); } - if (const auto wait_sema = std::exchange(m_dangling_semaphore_signal, nullptr)) - { - // TODO: Sync on VS stage - primary_submit_info.wait_on(wait_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - } - auto& async_scheduler = g_fxo->get(); - const bool require_secondary_flush = async_scheduler.is_recording(); - if (async_scheduler.is_recording()) { if (async_scheduler.is_host_mode()) { - // Inject dependency chain using semaphores. - // HEAD = externally synchronized. - // TAIL = insert dangling wait, from the async CB to the next CB down. - m_dangling_semaphore_signal = *async_scheduler.get_sema(); - secondary_submit_info.queue_signal(m_dangling_semaphore_signal); + const VkSemaphore async_sema = *async_scheduler.get_sema(); + secondary_submit_info.queue_signal(async_sema); + primary_submit_info.wait_on(async_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); // Delay object destruction by one cycle vk::get_resource_manager()->push_down_current_scope(); } + + async_scheduler.flush(secondary_submit_info, force_flush); } if (signal_semaphore) @@ -2296,11 +2285,6 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore m_current_command_buffer->submit(primary_submit_info, force_flush); - if (require_secondary_flush) - { - async_scheduler.flush(secondary_submit_info, force_flush); - } - m_queue_status.clear(flush_queue_state::flushing); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index b8138f15e8..8a3fc3e893 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -115,7 +115,6 @@ private: vk::command_pool m_command_buffer_pool; vk::command_buffer_chain m_primary_cb_list; vk::command_buffer_chunk* m_current_command_buffer = nullptr; - VkSemaphore m_dangling_semaphore_signal = VK_NULL_HANDLE; volatile vk::host_data_t* m_host_data_ptr = nullptr; std::unique_ptr m_host_object_data; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp index 934c599744..6f7721b2ac 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp @@ -7,7 +7,7 @@ namespace vk { - u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags) + u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkSharingMode sharing_mode) { /** * Key layout: @@ -17,7 +17,8 @@ namespace vk * 40-48: Depth (Max 255) * 48-54: Mipmaps (Max 63) <- We have some room here, it is not possible to have more than 12 mip levels on PS3 and 16 on PC is pushing it. * 54-56: Type (Max 3) - * 56-64: Flags (Max 255) <- We have some room here, we only care about a small subset of create flags. + * 56-57: Sharing (Max 1) <- Boolean. Exclusive = 0, shared = 1 + * 57-64: Flags (Max 127) <- We have some room here, we only care about a small subset of create flags. */ ensure(static_cast(format) < 0xFF); return (static_cast(format) & 0xFF) | @@ -26,7 +27,8 @@ namespace vk (static_cast(d) << 40) | (static_cast(mipmaps) << 48) | (static_cast(type) << 54) | - (static_cast(create_flags) << 56); + (static_cast(sharing_mode) << 56) | + (static_cast(create_flags) << 57); } texture_cache::cached_image_reference_t::cached_image_reference_t(texture_cache* parent, std::unique_ptr& previous) @@ -44,7 +46,7 @@ namespace vk data->current_queue_family = VK_QUEUE_FAMILY_IGNORED; // Move this object to the cached image pool - const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags); + const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags, data->info.sharingMode); std::lock_guard lock(parent->m_cached_pool_lock); if (!parent->m_cache_is_exiting) @@ -506,13 +508,13 @@ namespace vk return result; } - std::unique_ptr texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage) + std::unique_ptr texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage, VkSharingMode sharing) { reader_lock lock(m_cached_pool_lock); if (!m_cached_images.empty()) { - const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags); + const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags, sharing); lock.upgrade(); for (auto it = m_cached_images.begin(); it != m_cached_images.end(); ++it) @@ -538,7 +540,7 @@ namespace vk const VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); const u16 layers = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? 6 : 1; - auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags); + auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags, VK_SHARING_MODE_EXCLUSIVE); if (!image) { @@ -546,7 +548,7 @@ namespace vk image_type, dst_format, w, h, d, mips, layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL_RPCS3, VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format)); if (!image->value) @@ -823,7 +825,18 @@ namespace vk if (region.exists()) { image = dynamic_cast(region.get_raw_texture()); - if ((flags & texture_create_flags::do_not_reuse) || !image || region.get_image_type() != type || image->depth() != depth) // TODO + bool reusable = true; + + if (flags & texture_create_flags::do_not_reuse) + { + reusable = false; + } + else if (flags & texture_create_flags::shareable) + { + reusable = (image && image->sharing_mode() == VK_SHARING_MODE_CONCURRENT); + } + + if (!reusable || !image || region.get_image_type() != type || image->depth() != depth) // TODO { // Incompatible view/type region.destroy(); @@ -860,14 +873,20 @@ namespace vk { const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap; const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format); - const VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; + VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; + VkSharingMode sharing_mode = (flags & texture_create_flags::shareable) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE; - if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags)) + if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags, sharing_mode)) { image = found.release(); } else { + if (sharing_mode == VK_SHARING_MODE_CONCURRENT) + { + create_flags |= VK_IMAGE_CREATE_SHAREABLE_RPCS3; + } + image = new vk::viewable_image(*m_device, m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image_type, vk_format, @@ -946,7 +965,9 @@ namespace vk } } - const rsx::flags32_t create_flags = g_fxo->get().is_host_mode() ? texture_create_flags::do_not_reuse : 0; + const rsx::flags32_t create_flags = g_fxo->get().is_host_mode() + ? (texture_create_flags::shareable | texture_create_flags::do_not_reuse) + : 0; auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled, rsx::component_order::default_, create_flags); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index f63806ec9c..507225bccd 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -390,7 +390,8 @@ namespace vk enum texture_create_flags : u32 { initialize_image_contents = 1, - do_not_reuse = 2 + do_not_reuse = 2, + shareable = 4 }; void on_section_destroyed(cached_texture_section& tex) override; @@ -421,7 +422,7 @@ namespace vk vk::image* get_template_from_collection_impl(const std::vector& sections_to_transfer) const; - std::unique_ptr find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage); + std::unique_ptr find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage, VkSharingMode sharing); protected: vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index 6b51a98c7e..1dcee7d934 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -210,7 +210,7 @@ namespace vk VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, usage, - VK_IMAGE_CREATE_ALLOW_NULL, // Allow creation to fail if there is no memory + VK_IMAGE_CREATE_ALLOW_NULL_RPCS3, // Allow creation to fail if there is no memory VMM_ALLOCATION_POOL_SWAPCHAIN, RSX_FORMAT_CLASS_COLOR); }; diff --git a/rpcs3/Emu/RSX/VK/vkutils/image.cpp b/rpcs3/Emu/RSX/VK/vkutils/image.cpp index 265e68c153..3f03185017 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/image.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/image.cpp @@ -71,6 +71,18 @@ namespace vk info.initialLayout = initial_layout; info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + if (image_flags & VK_IMAGE_CREATE_SHAREABLE_RPCS3) + { + u32 queue_families[] = { + dev.get_graphics_queue_family(), + dev.get_transfer_queue_family() + }; + + info.sharingMode = VK_SHARING_MODE_CONCURRENT; + info.queueFamilyIndexCount = 2; + info.pQueueFamilyIndices = queue_families; + } + create_impl(dev, access_flags, memory_type, allocation_pool); m_storage_aspect = get_aspect_flags(format); @@ -101,8 +113,8 @@ namespace vk ensure(!value && !memory); validate(dev, info); - const bool nullable = !!(info.flags & VK_IMAGE_CREATE_ALLOW_NULL); - info.flags &= ~VK_IMAGE_CREATE_ALLOW_NULL; + const bool nullable = !!(info.flags & VK_IMAGE_CREATE_ALLOW_NULL_RPCS3); + info.flags &= ~VK_IMAGE_CREATE_SPECIAL_FLAGS_RPCS3; CHECK_RESULT(vkCreateImage(m_device, &info, nullptr, &value)); @@ -170,6 +182,11 @@ namespace vk return info.imageType; } + VkSharingMode image::sharing_mode() const + { + return info.sharingMode; + } + VkImageAspectFlags image::aspect() const { return m_storage_aspect; @@ -210,8 +227,14 @@ namespace vk { ensure(m_layout_stack.empty()); ensure(current_queue_family != cmd.get_queue_family()); - VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() }; - change_image_layout(cmd, value, current_layout, new_layout, range, current_queue_family, cmd.get_queue_family(), 0u, ~0u); + + if (info.sharingMode == VK_SHARING_MODE_EXCLUSIVE || current_layout != new_layout) + { + VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() }; + const u32 src_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? current_queue_family : VK_QUEUE_FAMILY_IGNORED; + const u32 dst_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? cmd.get_queue_family() : VK_QUEUE_FAMILY_IGNORED; + change_image_layout(cmd, value, current_layout, new_layout, range, src_queue_family, dst_queue_family, 0u, ~0u); + } current_layout = new_layout; current_queue_family = cmd.get_queue_family(); @@ -221,8 +244,17 @@ namespace vk { ensure(current_queue_family == src_queue_cmd.get_queue_family()); ensure(m_layout_stack.empty()); - VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() }; - change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family, ~0u, 0u); + + if (info.sharingMode == VK_SHARING_MODE_EXCLUSIVE || current_layout != new_layout) + { + VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() }; + const u32 src_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? current_queue_family : VK_QUEUE_FAMILY_IGNORED; + const u32 dst_queue_family2 = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? dst_queue_family : VK_QUEUE_FAMILY_IGNORED; + change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family2, ~0u, 0u); + } + + current_layout = new_layout; + current_queue_family = dst_queue_family; } void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout) diff --git a/rpcs3/Emu/RSX/VK/vkutils/image.h b/rpcs3/Emu/RSX/VK/vkutils/image.h index 8a570b7ddf..4a0372d226 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/image.h +++ b/rpcs3/Emu/RSX/VK/vkutils/image.h @@ -18,9 +18,12 @@ namespace vk { enum : u32// special remap_encoding enums { - VK_REMAP_IDENTITY = 0xCAFEBABE, // Special view encoding to return an identity image view - VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF, // Special encoding for multisampled images; returns a multisampled image view - VK_IMAGE_CREATE_ALLOW_NULL = 0x80000000, // Special flag that allows null images to be created if there is no memory + VK_REMAP_IDENTITY = 0xCAFEBABE, // Special view encoding to return an identity image view + VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF, // Special encoding for multisampled images; returns a multisampled image view + VK_IMAGE_CREATE_ALLOW_NULL_RPCS3 = 0x80000000, // Special flag that allows null images to be created if there is no memory + VK_IMAGE_CREATE_SHAREABLE_RPCS3 = 0x40000000, // Special flag to create a shareable image + + VK_IMAGE_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_IMAGE_CREATE_ALLOW_NULL_RPCS3 | VK_IMAGE_CREATE_SHAREABLE_RPCS3) }; class image @@ -73,6 +76,7 @@ namespace vk u8 samples() const; VkFormat format() const; VkImageType type() const; + VkSharingMode sharing_mode() const; VkImageAspectFlags aspect() const; rsx::format_class format_class() const; diff --git a/rpcs3/Emu/RSX/VK/vkutils/scratch.cpp b/rpcs3/Emu/RSX/VK/vkutils/scratch.cpp index a07a50d59c..7b47108a3d 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/scratch.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/scratch.cpp @@ -97,7 +97,7 @@ namespace vk auto& tex = g_null_image_views[type]; tex = std::make_unique(*g_render_device, g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image_type, VK_FORMAT_B8G8R8A8_UNORM, size, size, 1, 1, num_layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags | VK_IMAGE_CREATE_ALLOW_NULL, VMM_ALLOCATION_POOL_SCRATCH); + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags | VK_IMAGE_CREATE_ALLOW_NULL_RPCS3, VMM_ALLOCATION_POOL_SCRATCH); if (!tex->value) {