diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index d2071b8da8..1ff42dbd72 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "VKHelpers.h" #include "VKCompute.h" #include "Utilities/mutex.h" @@ -14,11 +14,14 @@ namespace vk std::unordered_map> g_typeless_textures; std::unordered_map> g_compute_tasks; + // Garbage collection + std::vector> g_deleted_typeless_textures; + VkSampler g_null_sampler = nullptr; atomic_t g_cb_no_interrupt_flag { false }; - //Driver compatibility workarounds + // Driver compatibility workarounds VkFlags g_heap_compatible_buffer_types = 0; driver_vendor g_driver_vendor = driver_vendor::unknown; bool g_drv_no_primitive_restart_flag = false; @@ -28,7 +31,7 @@ namespace vk u64 g_num_processed_frames = 0; u64 g_num_total_frames = 0; - //global submit guard to prevent race condition on queue submit + // global submit guard to prevent race condition on queue submit shared_mutex g_submit_mutex; VKAPI_ATTR void* VKAPI_CALL mem_realloc(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) @@ -172,20 +175,28 @@ namespace vk return g_null_image_view->value; } - vk::image* get_typeless_helper(VkFormat format) + vk::image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height) { auto create_texture = [&]() { + u32 new_width = align(requested_width, 1024u); + u32 new_height = align(requested_height, 1024u); + return new vk::image(*g_current_renderer, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, format, 4096, 4096, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0); }; auto &ptr = g_typeless_textures[(u32)format]; - if (!ptr) + if (!ptr || ptr->width() < requested_width || ptr->height() < requested_height) { - auto _img = create_texture(); - ptr.reset(_img); + if (ptr) + { + // Safely move to deleted pile + g_deleted_typeless_textures.emplace_back(std::move(ptr)); + } + + ptr.reset(create_texture()); } return ptr.get(); @@ -229,6 +240,7 @@ namespace vk g_scratch_buffer.reset(); g_typeless_textures.clear(); + g_deleted_typeless_textures.clear(); if (g_null_sampler) vkDestroySampler(*g_current_renderer, g_null_sampler, nullptr); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index ca779f03af..4a8f6f779b 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -113,7 +113,7 @@ namespace vk VkSampler null_sampler(); VkImageView null_image_view(vk::command_buffer&); - image* get_typeless_helper(VkFormat format); + image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height); buffer* get_scratch_buffer(); memory_type_mapping get_memory_mapping(const physical_device& dev); diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index e2bdf998d1..17138f5df6 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -289,7 +289,7 @@ namespace vk else if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { //Most depth/stencil formats cannot be scaled using hw blit - if (src_format == VK_FORMAT_UNDEFINED || dst_width > 4096 || (src_height + dst_height) > 4096) + if (src_format == VK_FORMAT_UNDEFINED) { LOG_ERROR(RSX, "Could not blit depth/stencil image. src_fmt=0x%x, src=%dx%d, dst=%dx%d", (u32)src_format, src_width, src_height, dst_width, dst_height); @@ -359,21 +359,23 @@ namespace vk vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst, preferred_dst_format, 1, &info); }; - areai src_rect = { (s32)src_x_offset, (s32)src_y_offset, s32(src_x_offset + src_width), s32(src_y_offset + src_height) }; - areai dst_rect = { (s32)dst_x_offset, (s32)dst_y_offset, s32(dst_x_offset + dst_width), s32(dst_y_offset + dst_height) }; + const areai src_rect = { (s32)src_x_offset, (s32)src_y_offset, s32(src_x_offset + src_width), s32(src_y_offset + src_height) }; + const areai dst_rect = { (s32)dst_x_offset, (s32)dst_y_offset, s32(dst_x_offset + dst_width), s32(dst_y_offset + dst_height) }; + const u32 typeless_w = dst_width; + const u32 typeless_h = src_height + dst_height; switch (src_format) { case VK_FORMAT_D16_UNORM: { - auto typeless = vk::get_typeless_helper(VK_FORMAT_R16_UNORM); + auto typeless = vk::get_typeless_helper(VK_FORMAT_R16_UNORM, typeless_w, typeless_h); change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL); stretch_image_typeless_unsafe(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT); break; } case VK_FORMAT_D24_UNORM_S8_UINT: { - auto typeless = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM); + auto typeless = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, typeless_w, typeless_h); change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL); stretch_image_typeless_unsafe(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); break; @@ -384,8 +386,8 @@ namespace vk // Since the typeless transfer itself violates spec, the only way to make it work is to use a D32S8 intermediate // Copy from src->intermediate then intermediate->dst for each aspect separately - auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_R32_SFLOAT); - auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UINT); + auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_R32_SFLOAT, typeless_w, typeless_h); + auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UINT, typeless_w, typeless_h); change_image_layout(cmd, typeless_depth, VK_IMAGE_LAYOUT_GENERAL); change_image_layout(cmd, typeless_stencil, VK_IMAGE_LAYOUT_GENERAL); @@ -546,7 +548,7 @@ namespace vk vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.src_gcm_format); // Transfer bits from src to typeless src - real_src = vk::get_typeless_helper(format); + real_src = vk::get_typeless_helper(format, (u32)internal_width, src->height()); src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint); src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint); @@ -562,7 +564,7 @@ namespace vk vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.dst_gcm_format); // Transfer bits from dst to typeless dst - real_dst = vk::get_typeless_helper(format); + real_dst = vk::get_typeless_helper(format, (u32)internal_width, dst->height()); dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint); dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint); @@ -603,7 +605,7 @@ namespace vk VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); - real_src = vk::get_typeless_helper(src->info.format); + real_src = vk::get_typeless_helper(src->info.format, src->width(), src->height()); vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, ©); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 4638152315..9b98894e5d 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -213,7 +213,7 @@ namespace vk if (transfer_width != vram_texture->width() || transfer_height != vram_texture->height()) { // TODO: Synchronize access to typeles textures - target = vk::get_typeless_helper(vram_texture->info.format); + target = vk::get_typeless_helper(vram_texture->info.format, transfer_width, transfer_height); change_image_layout(cmd, target, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); // Allow bilinear filtering on color textures where compatibility is likely