mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 03:25:16 +00:00
vk: Reimplement compliant async texture streaming
- Use CONCURRENT queue access instead of fighting with queue acquire/release via submit chains. The minor benefits of forcing EXCLUSIVE mode are buried under the huge penalty of multiple vkQueueSubmit. Batching submits does not help alleviate this situation. We simply must avoid interrupting execution.
This commit is contained in:
parent
0ba0f9d2b9
commit
d846142f0c
9 changed files with 95 additions and 61 deletions
|
@ -386,20 +386,13 @@ void VKGSRender::load_texture_env()
|
|||
// We have to do this here, because we have to assume the CB will be dumped
|
||||
auto& async_task_scheduler = g_fxo->get<vk::AsyncTaskScheduler>();
|
||||
|
||||
if (async_task_scheduler.is_recording())
|
||||
if (async_task_scheduler.is_recording() &&
|
||||
!async_task_scheduler.is_host_mode())
|
||||
{
|
||||
if (async_task_scheduler.is_host_mode())
|
||||
// Sync any async scheduler tasks
|
||||
if (auto ev = async_task_scheduler.get_primary_sync_label())
|
||||
{
|
||||
flush_command_queue();
|
||||
ensure(!async_task_scheduler.is_recording());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Sync any async scheduler tasks
|
||||
if (auto ev = async_task_scheduler.get_primary_sync_label())
|
||||
{
|
||||
ev->gpu_wait(*m_current_command_buffer);
|
||||
}
|
||||
ev->gpu_wait(*m_current_command_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2252,11 +2252,8 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
|||
m_current_command_buffer->end();
|
||||
m_current_command_buffer->tag();
|
||||
|
||||
// Flush any asynchronously scheduled jobs
|
||||
// So this is a bit trippy, but, in this case, the primary CB contains the 'release' operations, not the acquire ones.
|
||||
// The CB that comes in after this submit will acquire the yielded resources automatically.
|
||||
// This means the primary CB is the precursor to the async CB not the other way around.
|
||||
// Async CB should wait for the primary CB to signal.
|
||||
// Supporting concurrent access vastly simplifies this logic.
|
||||
// Instead of doing CB slice injection, we can just chain these together logically with the async stream going first
|
||||
vk::queue_submit_t primary_submit_info{ m_device->get_graphics_queue(), pFence };
|
||||
vk::queue_submit_t secondary_submit_info{};
|
||||
|
||||
|
@ -2265,28 +2262,20 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
|||
primary_submit_info.wait_on(wait_semaphore, pipeline_stage_flags);
|
||||
}
|
||||
|
||||
if (const auto wait_sema = std::exchange(m_dangling_semaphore_signal, nullptr))
|
||||
{
|
||||
// TODO: Sync on VS stage
|
||||
primary_submit_info.wait_on(wait_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
|
||||
}
|
||||
|
||||
auto& async_scheduler = g_fxo->get<vk::AsyncTaskScheduler>();
|
||||
const bool require_secondary_flush = async_scheduler.is_recording();
|
||||
|
||||
if (async_scheduler.is_recording())
|
||||
{
|
||||
if (async_scheduler.is_host_mode())
|
||||
{
|
||||
// Inject dependency chain using semaphores.
|
||||
// HEAD = externally synchronized.
|
||||
// TAIL = insert dangling wait, from the async CB to the next CB down.
|
||||
m_dangling_semaphore_signal = *async_scheduler.get_sema();
|
||||
secondary_submit_info.queue_signal(m_dangling_semaphore_signal);
|
||||
const VkSemaphore async_sema = *async_scheduler.get_sema();
|
||||
secondary_submit_info.queue_signal(async_sema);
|
||||
primary_submit_info.wait_on(async_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
|
||||
|
||||
// Delay object destruction by one cycle
|
||||
vk::get_resource_manager()->push_down_current_scope();
|
||||
}
|
||||
|
||||
async_scheduler.flush(secondary_submit_info, force_flush);
|
||||
}
|
||||
|
||||
if (signal_semaphore)
|
||||
|
@ -2296,11 +2285,6 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
|||
|
||||
m_current_command_buffer->submit(primary_submit_info, force_flush);
|
||||
|
||||
if (require_secondary_flush)
|
||||
{
|
||||
async_scheduler.flush(secondary_submit_info, force_flush);
|
||||
}
|
||||
|
||||
m_queue_status.clear(flush_queue_state::flushing);
|
||||
}
|
||||
|
||||
|
|
|
@ -115,7 +115,6 @@ private:
|
|||
vk::command_pool m_command_buffer_pool;
|
||||
vk::command_buffer_chain<VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
|
||||
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
|
||||
VkSemaphore m_dangling_semaphore_signal = VK_NULL_HANDLE;
|
||||
|
||||
volatile vk::host_data_t* m_host_data_ptr = nullptr;
|
||||
std::unique_ptr<vk::buffer> m_host_object_data;
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
namespace vk
|
||||
{
|
||||
u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags)
|
||||
u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkSharingMode sharing_mode)
|
||||
{
|
||||
/**
|
||||
* Key layout:
|
||||
|
@ -17,7 +17,8 @@ namespace vk
|
|||
* 40-48: Depth (Max 255)
|
||||
* 48-54: Mipmaps (Max 63) <- We have some room here, it is not possible to have more than 12 mip levels on PS3 and 16 on PC is pushing it.
|
||||
* 54-56: Type (Max 3)
|
||||
* 56-64: Flags (Max 255) <- We have some room here, we only care about a small subset of create flags.
|
||||
* 56-57: Sharing (Max 1) <- Boolean. Exclusive = 0, shared = 1
|
||||
* 57-64: Flags (Max 127) <- We have some room here, we only care about a small subset of create flags.
|
||||
*/
|
||||
ensure(static_cast<u32>(format) < 0xFF);
|
||||
return (static_cast<u64>(format) & 0xFF) |
|
||||
|
@ -26,7 +27,8 @@ namespace vk
|
|||
(static_cast<u64>(d) << 40) |
|
||||
(static_cast<u64>(mipmaps) << 48) |
|
||||
(static_cast<u64>(type) << 54) |
|
||||
(static_cast<u64>(create_flags) << 56);
|
||||
(static_cast<u64>(sharing_mode) << 56) |
|
||||
(static_cast<u64>(create_flags) << 57);
|
||||
}
|
||||
|
||||
texture_cache::cached_image_reference_t::cached_image_reference_t(texture_cache* parent, std::unique_ptr<vk::viewable_image>& previous)
|
||||
|
@ -44,7 +46,7 @@ namespace vk
|
|||
data->current_queue_family = VK_QUEUE_FAMILY_IGNORED;
|
||||
|
||||
// Move this object to the cached image pool
|
||||
const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags);
|
||||
const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags, data->info.sharingMode);
|
||||
std::lock_guard lock(parent->m_cached_pool_lock);
|
||||
|
||||
if (!parent->m_cache_is_exiting)
|
||||
|
@ -506,13 +508,13 @@ namespace vk
|
|||
return result;
|
||||
}
|
||||
|
||||
std::unique_ptr<vk::viewable_image> texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage)
|
||||
std::unique_ptr<vk::viewable_image> texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage, VkSharingMode sharing)
|
||||
{
|
||||
reader_lock lock(m_cached_pool_lock);
|
||||
|
||||
if (!m_cached_images.empty())
|
||||
{
|
||||
const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags);
|
||||
const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags, sharing);
|
||||
lock.upgrade();
|
||||
|
||||
for (auto it = m_cached_images.begin(); it != m_cached_images.end(); ++it)
|
||||
|
@ -538,7 +540,7 @@ namespace vk
|
|||
const VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format);
|
||||
const u16 layers = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? 6 : 1;
|
||||
|
||||
auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags);
|
||||
auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags, VK_SHARING_MODE_EXCLUSIVE);
|
||||
|
||||
if (!image)
|
||||
{
|
||||
|
@ -546,7 +548,7 @@ namespace vk
|
|||
image_type,
|
||||
dst_format,
|
||||
w, h, d, mips, layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL,
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL_RPCS3,
|
||||
VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format));
|
||||
|
||||
if (!image->value)
|
||||
|
@ -823,7 +825,18 @@ namespace vk
|
|||
if (region.exists())
|
||||
{
|
||||
image = dynamic_cast<vk::viewable_image*>(region.get_raw_texture());
|
||||
if ((flags & texture_create_flags::do_not_reuse) || !image || region.get_image_type() != type || image->depth() != depth) // TODO
|
||||
bool reusable = true;
|
||||
|
||||
if (flags & texture_create_flags::do_not_reuse)
|
||||
{
|
||||
reusable = false;
|
||||
}
|
||||
else if (flags & texture_create_flags::shareable)
|
||||
{
|
||||
reusable = (image && image->sharing_mode() == VK_SHARING_MODE_CONCURRENT);
|
||||
}
|
||||
|
||||
if (!reusable || !image || region.get_image_type() != type || image->depth() != depth) // TODO
|
||||
{
|
||||
// Incompatible view/type
|
||||
region.destroy();
|
||||
|
@ -860,14 +873,20 @@ namespace vk
|
|||
{
|
||||
const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap;
|
||||
const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format);
|
||||
const VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
|
||||
VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
|
||||
VkSharingMode sharing_mode = (flags & texture_create_flags::shareable) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
|
||||
|
||||
if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags))
|
||||
if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags, sharing_mode))
|
||||
{
|
||||
image = found.release();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sharing_mode == VK_SHARING_MODE_CONCURRENT)
|
||||
{
|
||||
create_flags |= VK_IMAGE_CREATE_SHAREABLE_RPCS3;
|
||||
}
|
||||
|
||||
image = new vk::viewable_image(*m_device,
|
||||
m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
image_type, vk_format,
|
||||
|
@ -946,7 +965,9 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
const rsx::flags32_t create_flags = g_fxo->get<AsyncTaskScheduler>().is_host_mode() ? texture_create_flags::do_not_reuse : 0;
|
||||
const rsx::flags32_t create_flags = g_fxo->get<AsyncTaskScheduler>().is_host_mode()
|
||||
? (texture_create_flags::shareable | texture_create_flags::do_not_reuse)
|
||||
: 0;
|
||||
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled,
|
||||
rsx::component_order::default_, create_flags);
|
||||
|
||||
|
|
|
@ -390,7 +390,8 @@ namespace vk
|
|||
enum texture_create_flags : u32
|
||||
{
|
||||
initialize_image_contents = 1,
|
||||
do_not_reuse = 2
|
||||
do_not_reuse = 2,
|
||||
shareable = 4
|
||||
};
|
||||
|
||||
void on_section_destroyed(cached_texture_section& tex) override;
|
||||
|
@ -421,7 +422,7 @@ namespace vk
|
|||
|
||||
vk::image* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const;
|
||||
|
||||
std::unique_ptr<vk::viewable_image> find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage);
|
||||
std::unique_ptr<vk::viewable_image> find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage, VkSharingMode sharing);
|
||||
|
||||
protected:
|
||||
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
|
||||
|
|
|
@ -210,7 +210,7 @@ namespace vk
|
|||
VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL,
|
||||
usage,
|
||||
VK_IMAGE_CREATE_ALLOW_NULL, // Allow creation to fail if there is no memory
|
||||
VK_IMAGE_CREATE_ALLOW_NULL_RPCS3, // Allow creation to fail if there is no memory
|
||||
VMM_ALLOCATION_POOL_SWAPCHAIN,
|
||||
RSX_FORMAT_CLASS_COLOR);
|
||||
};
|
||||
|
|
|
@ -71,6 +71,18 @@ namespace vk
|
|||
info.initialLayout = initial_layout;
|
||||
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
|
||||
if (image_flags & VK_IMAGE_CREATE_SHAREABLE_RPCS3)
|
||||
{
|
||||
u32 queue_families[] = {
|
||||
dev.get_graphics_queue_family(),
|
||||
dev.get_transfer_queue_family()
|
||||
};
|
||||
|
||||
info.sharingMode = VK_SHARING_MODE_CONCURRENT;
|
||||
info.queueFamilyIndexCount = 2;
|
||||
info.pQueueFamilyIndices = queue_families;
|
||||
}
|
||||
|
||||
create_impl(dev, access_flags, memory_type, allocation_pool);
|
||||
m_storage_aspect = get_aspect_flags(format);
|
||||
|
||||
|
@ -101,8 +113,8 @@ namespace vk
|
|||
ensure(!value && !memory);
|
||||
validate(dev, info);
|
||||
|
||||
const bool nullable = !!(info.flags & VK_IMAGE_CREATE_ALLOW_NULL);
|
||||
info.flags &= ~VK_IMAGE_CREATE_ALLOW_NULL;
|
||||
const bool nullable = !!(info.flags & VK_IMAGE_CREATE_ALLOW_NULL_RPCS3);
|
||||
info.flags &= ~VK_IMAGE_CREATE_SPECIAL_FLAGS_RPCS3;
|
||||
|
||||
CHECK_RESULT(vkCreateImage(m_device, &info, nullptr, &value));
|
||||
|
||||
|
@ -170,6 +182,11 @@ namespace vk
|
|||
return info.imageType;
|
||||
}
|
||||
|
||||
VkSharingMode image::sharing_mode() const
|
||||
{
|
||||
return info.sharingMode;
|
||||
}
|
||||
|
||||
VkImageAspectFlags image::aspect() const
|
||||
{
|
||||
return m_storage_aspect;
|
||||
|
@ -210,8 +227,14 @@ namespace vk
|
|||
{
|
||||
ensure(m_layout_stack.empty());
|
||||
ensure(current_queue_family != cmd.get_queue_family());
|
||||
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
|
||||
change_image_layout(cmd, value, current_layout, new_layout, range, current_queue_family, cmd.get_queue_family(), 0u, ~0u);
|
||||
|
||||
if (info.sharingMode == VK_SHARING_MODE_EXCLUSIVE || current_layout != new_layout)
|
||||
{
|
||||
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
|
||||
const u32 src_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? current_queue_family : VK_QUEUE_FAMILY_IGNORED;
|
||||
const u32 dst_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? cmd.get_queue_family() : VK_QUEUE_FAMILY_IGNORED;
|
||||
change_image_layout(cmd, value, current_layout, new_layout, range, src_queue_family, dst_queue_family, 0u, ~0u);
|
||||
}
|
||||
|
||||
current_layout = new_layout;
|
||||
current_queue_family = cmd.get_queue_family();
|
||||
|
@ -221,8 +244,17 @@ namespace vk
|
|||
{
|
||||
ensure(current_queue_family == src_queue_cmd.get_queue_family());
|
||||
ensure(m_layout_stack.empty());
|
||||
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
|
||||
change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family, ~0u, 0u);
|
||||
|
||||
if (info.sharingMode == VK_SHARING_MODE_EXCLUSIVE || current_layout != new_layout)
|
||||
{
|
||||
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
|
||||
const u32 src_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? current_queue_family : VK_QUEUE_FAMILY_IGNORED;
|
||||
const u32 dst_queue_family2 = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? dst_queue_family : VK_QUEUE_FAMILY_IGNORED;
|
||||
change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family2, ~0u, 0u);
|
||||
}
|
||||
|
||||
current_layout = new_layout;
|
||||
current_queue_family = dst_queue_family;
|
||||
}
|
||||
|
||||
void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout)
|
||||
|
|
|
@ -18,9 +18,12 @@ namespace vk
|
|||
{
|
||||
enum : u32// special remap_encoding enums
|
||||
{
|
||||
VK_REMAP_IDENTITY = 0xCAFEBABE, // Special view encoding to return an identity image view
|
||||
VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF, // Special encoding for multisampled images; returns a multisampled image view
|
||||
VK_IMAGE_CREATE_ALLOW_NULL = 0x80000000, // Special flag that allows null images to be created if there is no memory
|
||||
VK_REMAP_IDENTITY = 0xCAFEBABE, // Special view encoding to return an identity image view
|
||||
VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF, // Special encoding for multisampled images; returns a multisampled image view
|
||||
VK_IMAGE_CREATE_ALLOW_NULL_RPCS3 = 0x80000000, // Special flag that allows null images to be created if there is no memory
|
||||
VK_IMAGE_CREATE_SHAREABLE_RPCS3 = 0x40000000, // Special flag to create a shareable image
|
||||
|
||||
VK_IMAGE_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_IMAGE_CREATE_ALLOW_NULL_RPCS3 | VK_IMAGE_CREATE_SHAREABLE_RPCS3)
|
||||
};
|
||||
|
||||
class image
|
||||
|
@ -73,6 +76,7 @@ namespace vk
|
|||
u8 samples() const;
|
||||
VkFormat format() const;
|
||||
VkImageType type() const;
|
||||
VkSharingMode sharing_mode() const;
|
||||
VkImageAspectFlags aspect() const;
|
||||
rsx::format_class format_class() const;
|
||||
|
||||
|
|
|
@ -97,7 +97,7 @@ namespace vk
|
|||
auto& tex = g_null_image_views[type];
|
||||
tex = std::make_unique<viewable_image>(*g_render_device, g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
image_type, VK_FORMAT_B8G8R8A8_UNORM, size, size, 1, 1, num_layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags | VK_IMAGE_CREATE_ALLOW_NULL, VMM_ALLOCATION_POOL_SCRATCH);
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags | VK_IMAGE_CREATE_ALLOW_NULL_RPCS3, VMM_ALLOCATION_POOL_SCRATCH);
|
||||
|
||||
if (!tex->value)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue