vk: Reimplement compliant async texture streaming

- Use CONCURRENT queue access instead of fighting with queue acquire/release via submit chains.
  The minor benefits of forcing EXCLUSIVE mode are buried under the huge penalty of multiple vkQueueSubmit.
  Batching submits does not help alleviate this situation. We simply must avoid interrupting execution.
This commit is contained in:
kd-11 2022-07-24 20:28:57 +03:00 committed by kd-11
parent 0ba0f9d2b9
commit d846142f0c
9 changed files with 95 additions and 61 deletions

View file

@ -386,20 +386,13 @@ void VKGSRender::load_texture_env()
// We have to do this here, because we have to assume the CB will be dumped
auto& async_task_scheduler = g_fxo->get<vk::AsyncTaskScheduler>();
if (async_task_scheduler.is_recording())
if (async_task_scheduler.is_recording() &&
!async_task_scheduler.is_host_mode())
{
if (async_task_scheduler.is_host_mode())
// Sync any async scheduler tasks
if (auto ev = async_task_scheduler.get_primary_sync_label())
{
flush_command_queue();
ensure(!async_task_scheduler.is_recording());
}
else
{
// Sync any async scheduler tasks
if (auto ev = async_task_scheduler.get_primary_sync_label())
{
ev->gpu_wait(*m_current_command_buffer);
}
ev->gpu_wait(*m_current_command_buffer);
}
}
}

View file

@ -2252,11 +2252,8 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
m_current_command_buffer->end();
m_current_command_buffer->tag();
// Flush any asynchronously scheduled jobs
// So this is a bit trippy, but, in this case, the primary CB contains the 'release' operations, not the acquire ones.
// The CB that comes in after this submit will acquire the yielded resources automatically.
// This means the primary CB is the precursor to the async CB not the other way around.
// Async CB should wait for the primary CB to signal.
// Supporting concurrent access vastly simplifies this logic.
// Instead of doing CB slice injection, we can just chain these together logically with the async stream going first
vk::queue_submit_t primary_submit_info{ m_device->get_graphics_queue(), pFence };
vk::queue_submit_t secondary_submit_info{};
@ -2265,28 +2262,20 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
primary_submit_info.wait_on(wait_semaphore, pipeline_stage_flags);
}
if (const auto wait_sema = std::exchange(m_dangling_semaphore_signal, nullptr))
{
// TODO: Sync on VS stage
primary_submit_info.wait_on(wait_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
}
auto& async_scheduler = g_fxo->get<vk::AsyncTaskScheduler>();
const bool require_secondary_flush = async_scheduler.is_recording();
if (async_scheduler.is_recording())
{
if (async_scheduler.is_host_mode())
{
// Inject dependency chain using semaphores.
// HEAD = externally synchronized.
// TAIL = insert dangling wait, from the async CB to the next CB down.
m_dangling_semaphore_signal = *async_scheduler.get_sema();
secondary_submit_info.queue_signal(m_dangling_semaphore_signal);
const VkSemaphore async_sema = *async_scheduler.get_sema();
secondary_submit_info.queue_signal(async_sema);
primary_submit_info.wait_on(async_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
// Delay object destruction by one cycle
vk::get_resource_manager()->push_down_current_scope();
}
async_scheduler.flush(secondary_submit_info, force_flush);
}
if (signal_semaphore)
@ -2296,11 +2285,6 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
m_current_command_buffer->submit(primary_submit_info, force_flush);
if (require_secondary_flush)
{
async_scheduler.flush(secondary_submit_info, force_flush);
}
m_queue_status.clear(flush_queue_state::flushing);
}

View file

@ -115,7 +115,6 @@ private:
vk::command_pool m_command_buffer_pool;
vk::command_buffer_chain<VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
VkSemaphore m_dangling_semaphore_signal = VK_NULL_HANDLE;
volatile vk::host_data_t* m_host_data_ptr = nullptr;
std::unique_ptr<vk::buffer> m_host_object_data;

View file

@ -7,7 +7,7 @@
namespace vk
{
u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags)
u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkSharingMode sharing_mode)
{
/**
* Key layout:
@ -17,7 +17,8 @@ namespace vk
* 40-48: Depth (Max 255)
* 48-54: Mipmaps (Max 63) <- We have some room here, it is not possible to have more than 12 mip levels on PS3 and 16 on PC is pushing it.
* 54-56: Type (Max 3)
* 56-64: Flags (Max 255) <- We have some room here, we only care about a small subset of create flags.
* 56-57: Sharing (Max 1) <- Boolean. Exclusive = 0, shared = 1
* 57-64: Flags (Max 127) <- We have some room here, we only care about a small subset of create flags.
*/
ensure(static_cast<u32>(format) < 0xFF);
return (static_cast<u64>(format) & 0xFF) |
@ -26,7 +27,8 @@ namespace vk
(static_cast<u64>(d) << 40) |
(static_cast<u64>(mipmaps) << 48) |
(static_cast<u64>(type) << 54) |
(static_cast<u64>(create_flags) << 56);
(static_cast<u64>(sharing_mode) << 56) |
(static_cast<u64>(create_flags) << 57);
}
texture_cache::cached_image_reference_t::cached_image_reference_t(texture_cache* parent, std::unique_ptr<vk::viewable_image>& previous)
@ -44,7 +46,7 @@ namespace vk
data->current_queue_family = VK_QUEUE_FAMILY_IGNORED;
// Move this object to the cached image pool
const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags);
const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags, data->info.sharingMode);
std::lock_guard lock(parent->m_cached_pool_lock);
if (!parent->m_cache_is_exiting)
@ -506,13 +508,13 @@ namespace vk
return result;
}
std::unique_ptr<vk::viewable_image> texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage)
std::unique_ptr<vk::viewable_image> texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage, VkSharingMode sharing)
{
reader_lock lock(m_cached_pool_lock);
if (!m_cached_images.empty())
{
const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags);
const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags, sharing);
lock.upgrade();
for (auto it = m_cached_images.begin(); it != m_cached_images.end(); ++it)
@ -538,7 +540,7 @@ namespace vk
const VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format);
const u16 layers = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? 6 : 1;
auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags);
auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags, VK_SHARING_MODE_EXCLUSIVE);
if (!image)
{
@ -546,7 +548,7 @@ namespace vk
image_type,
dst_format,
w, h, d, mips, layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL_RPCS3,
VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format));
if (!image->value)
@ -823,7 +825,18 @@ namespace vk
if (region.exists())
{
image = dynamic_cast<vk::viewable_image*>(region.get_raw_texture());
if ((flags & texture_create_flags::do_not_reuse) || !image || region.get_image_type() != type || image->depth() != depth) // TODO
bool reusable = true;
if (flags & texture_create_flags::do_not_reuse)
{
reusable = false;
}
else if (flags & texture_create_flags::shareable)
{
reusable = (image && image->sharing_mode() == VK_SHARING_MODE_CONCURRENT);
}
if (!reusable || !image || region.get_image_type() != type || image->depth() != depth) // TODO
{
// Incompatible view/type
region.destroy();
@ -860,14 +873,20 @@ namespace vk
{
const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap;
const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format);
const VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
VkSharingMode sharing_mode = (flags & texture_create_flags::shareable) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags))
if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags, sharing_mode))
{
image = found.release();
}
else
{
if (sharing_mode == VK_SHARING_MODE_CONCURRENT)
{
create_flags |= VK_IMAGE_CREATE_SHAREABLE_RPCS3;
}
image = new vk::viewable_image(*m_device,
m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type, vk_format,
@ -946,7 +965,9 @@ namespace vk
}
}
const rsx::flags32_t create_flags = g_fxo->get<AsyncTaskScheduler>().is_host_mode() ? texture_create_flags::do_not_reuse : 0;
const rsx::flags32_t create_flags = g_fxo->get<AsyncTaskScheduler>().is_host_mode()
? (texture_create_flags::shareable | texture_create_flags::do_not_reuse)
: 0;
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled,
rsx::component_order::default_, create_flags);

View file

@ -390,7 +390,8 @@ namespace vk
enum texture_create_flags : u32
{
initialize_image_contents = 1,
do_not_reuse = 2
do_not_reuse = 2,
shareable = 4
};
void on_section_destroyed(cached_texture_section& tex) override;
@ -421,7 +422,7 @@ namespace vk
vk::image* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const;
std::unique_ptr<vk::viewable_image> find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage);
std::unique_ptr<vk::viewable_image> find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage, VkSharingMode sharing);
protected:
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,

View file

@ -210,7 +210,7 @@ namespace vk
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL,
usage,
VK_IMAGE_CREATE_ALLOW_NULL, // Allow creation to fail if there is no memory
VK_IMAGE_CREATE_ALLOW_NULL_RPCS3, // Allow creation to fail if there is no memory
VMM_ALLOCATION_POOL_SWAPCHAIN,
RSX_FORMAT_CLASS_COLOR);
};

View file

@ -71,6 +71,18 @@ namespace vk
info.initialLayout = initial_layout;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
if (image_flags & VK_IMAGE_CREATE_SHAREABLE_RPCS3)
{
u32 queue_families[] = {
dev.get_graphics_queue_family(),
dev.get_transfer_queue_family()
};
info.sharingMode = VK_SHARING_MODE_CONCURRENT;
info.queueFamilyIndexCount = 2;
info.pQueueFamilyIndices = queue_families;
}
create_impl(dev, access_flags, memory_type, allocation_pool);
m_storage_aspect = get_aspect_flags(format);
@ -101,8 +113,8 @@ namespace vk
ensure(!value && !memory);
validate(dev, info);
const bool nullable = !!(info.flags & VK_IMAGE_CREATE_ALLOW_NULL);
info.flags &= ~VK_IMAGE_CREATE_ALLOW_NULL;
const bool nullable = !!(info.flags & VK_IMAGE_CREATE_ALLOW_NULL_RPCS3);
info.flags &= ~VK_IMAGE_CREATE_SPECIAL_FLAGS_RPCS3;
CHECK_RESULT(vkCreateImage(m_device, &info, nullptr, &value));
@ -170,6 +182,11 @@ namespace vk
return info.imageType;
}
VkSharingMode image::sharing_mode() const
{
return info.sharingMode;
}
VkImageAspectFlags image::aspect() const
{
return m_storage_aspect;
@ -210,8 +227,14 @@ namespace vk
{
ensure(m_layout_stack.empty());
ensure(current_queue_family != cmd.get_queue_family());
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
change_image_layout(cmd, value, current_layout, new_layout, range, current_queue_family, cmd.get_queue_family(), 0u, ~0u);
if (info.sharingMode == VK_SHARING_MODE_EXCLUSIVE || current_layout != new_layout)
{
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
const u32 src_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? current_queue_family : VK_QUEUE_FAMILY_IGNORED;
const u32 dst_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? cmd.get_queue_family() : VK_QUEUE_FAMILY_IGNORED;
change_image_layout(cmd, value, current_layout, new_layout, range, src_queue_family, dst_queue_family, 0u, ~0u);
}
current_layout = new_layout;
current_queue_family = cmd.get_queue_family();
@ -221,8 +244,17 @@ namespace vk
{
ensure(current_queue_family == src_queue_cmd.get_queue_family());
ensure(m_layout_stack.empty());
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family, ~0u, 0u);
if (info.sharingMode == VK_SHARING_MODE_EXCLUSIVE || current_layout != new_layout)
{
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
const u32 src_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? current_queue_family : VK_QUEUE_FAMILY_IGNORED;
const u32 dst_queue_family2 = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? dst_queue_family : VK_QUEUE_FAMILY_IGNORED;
change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family2, ~0u, 0u);
}
current_layout = new_layout;
current_queue_family = dst_queue_family;
}
void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout)

View file

@ -18,9 +18,12 @@ namespace vk
{
enum : u32// special remap_encoding enums
{
VK_REMAP_IDENTITY = 0xCAFEBABE, // Special view encoding to return an identity image view
VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF, // Special encoding for multisampled images; returns a multisampled image view
VK_IMAGE_CREATE_ALLOW_NULL = 0x80000000, // Special flag that allows null images to be created if there is no memory
VK_REMAP_IDENTITY = 0xCAFEBABE, // Special view encoding to return an identity image view
VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF, // Special encoding for multisampled images; returns a multisampled image view
VK_IMAGE_CREATE_ALLOW_NULL_RPCS3 = 0x80000000, // Special flag that allows null images to be created if there is no memory
VK_IMAGE_CREATE_SHAREABLE_RPCS3 = 0x40000000, // Special flag to create a shareable image
VK_IMAGE_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_IMAGE_CREATE_ALLOW_NULL_RPCS3 | VK_IMAGE_CREATE_SHAREABLE_RPCS3)
};
class image
@ -73,6 +76,7 @@ namespace vk
u8 samples() const;
VkFormat format() const;
VkImageType type() const;
VkSharingMode sharing_mode() const;
VkImageAspectFlags aspect() const;
rsx::format_class format_class() const;

View file

@ -97,7 +97,7 @@ namespace vk
auto& tex = g_null_image_views[type];
tex = std::make_unique<viewable_image>(*g_render_device, g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type, VK_FORMAT_B8G8R8A8_UNORM, size, size, 1, 1, num_layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags | VK_IMAGE_CREATE_ALLOW_NULL, VMM_ALLOCATION_POOL_SCRATCH);
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags | VK_IMAGE_CREATE_ALLOW_NULL_RPCS3, VMM_ALLOCATION_POOL_SCRATCH);
if (!tex->value)
{