From 5f6d1644cf7f220ce768eafc0c8b809dfb613ba6 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 14 Jul 2021 01:25:32 +0300 Subject: [PATCH] vk: Refactor VkTextureCache by moving most of the code to cpp file - Makes adding next set of changes easier --- rpcs3/Emu/RSX/VK/VKTextureCache.cpp | 870 +++++++++++++++++++++++++++- rpcs3/Emu/RSX/VK/VKTextureCache.h | 863 ++------------------------- 2 files changed, 903 insertions(+), 830 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp index 9af8e6ce39..31678ecfde 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp @@ -58,7 +58,7 @@ namespace vk { const auto texel_layout = vk::get_format_element_size(src->format()); const auto elem_size = texel_layout.first; - vk::cs_shuffle_base *shuffle_kernel; + vk::cs_shuffle_base* shuffle_kernel; if (elem_size == 2) { @@ -162,12 +162,28 @@ namespace vk sync_timestamp = get_system_time(); } + void texture_cache::on_section_destroyed(cached_texture_section& tex) + { + if (tex.is_managed()) + { + vk::get_resource_manager()->dispose(tex.get_texture()); + } + } + + void texture_cache::clear() + { + baseclass::clear(); + + m_temporary_storage.clear(); + m_temporary_memory_size = 0; + } + void texture_cache::copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector& sections_to_transfer) const { const auto dst_aspect = dst->aspect(); const auto dst_bpp = vk::get_format_texel_width(dst->format()); - for (const auto §ion : sections_to_transfer) + for (const auto& section : sections_to_transfer) { if (!section.src) continue; @@ -324,11 +340,11 @@ namespace vk dst_y = src_h; } - vk::copy_scaled_image(cmd, tmp, _dst, - areai{ 0, 0, src_w, static_cast(src_h) }, - coordi{ { dst_x, dst_y }, { section.dst_w, section.dst_h } }, - 1, tmp->info.format == _dst->info.format, - VK_FILTER_NEAREST); + vk::copy_scaled_image(cmd, tmp, _dst, + areai{ 0, 0, src_w, static_cast(src_h) }, + coordi{ { dst_x, dst_y }, { section.dst_w, section.dst_h } }, + 1, tmp->info.format == _dst->info.format, + VK_FILTER_NEAREST); } else { @@ -358,4 +374,844 @@ namespace vk section.src->pop_layout(cmd); } } + + VkComponentMapping texture_cache::apply_component_mapping_flags(u32 gcm_format, rsx::component_order flags, const rsx::texture_channel_remap_t& remap_vector) const + { + switch (gcm_format) + { + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + //Dont bother letting this propagate + return{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; + default: + break; + } + + VkComponentMapping mapping = {}; + switch (flags) + { + case rsx::component_order::default_: + { + mapping = vk::apply_swizzle_remap(vk::get_component_mapping(gcm_format), remap_vector); + break; + } + case rsx::component_order::native: + { + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; + break; + } + case rsx::component_order::swapped_native: + { + mapping = { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; + break; + } + default: + break; + } + + return mapping; + } + + vk::image* texture_cache::get_template_from_collection_impl(const std::vector& sections_to_transfer) const + { + if (sections_to_transfer.size() == 1) [[likely]] + { + return sections_to_transfer.front().src; + } + + vk::image* result = nullptr; + for (const auto& section : sections_to_transfer) + { + if (!section.src) + continue; + + if (!result) + { + result = section.src; + } + else + { + if (section.src->native_component_map.a != result->native_component_map.a || + section.src->native_component_map.r != result->native_component_map.r || + section.src->native_component_map.g != result->native_component_map.g || + section.src->native_component_map.b != result->native_component_map.b) + { + // TODO + // This requires a far more complex setup as its not always possible to mix and match without compute assistance + return nullptr; + } + } + } + + return result; + } + + std::unique_ptr texture_cache::find_temporary_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps) + { + //const auto current_frame = vk::get_current_frame_id(); + for (auto& e : m_temporary_storage) + { + if (e.can_reuse && e.matches(format, w, h, d, mipmaps, 0)) + { + m_temporary_memory_size -= e.block_size; + e.block_size = 0; + return std::move(e.combined_image); + } + } + + return {}; + } + + std::unique_ptr texture_cache::find_temporary_cubemap(VkFormat format, u16 size) + { + //const auto current_frame = vk::get_current_frame_id(); + for (auto& e : m_temporary_storage) + { + if (e.can_reuse && e.matches(format, size, size, 1, 1, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) + { + m_temporary_memory_size -= e.block_size; + e.block_size = 0; + return std::move(e.combined_image); + } + } + + return {}; + } + + vk::image_view* texture_cache::create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, + u32 gcm_format, u16 x, u16 y, u16 w, u16 h, u16 d, u8 mips, const rsx::texture_channel_remap_t& remap_vector, bool copy) + { + std::unique_ptr image; + + VkImageCreateFlags image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; + VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); + u16 layers = 1; + + if (!image_flags) [[likely]] + { + image = find_temporary_image(dst_format, w, h, 1, mips); + } + else + { + image = find_temporary_cubemap(dst_format, w); + layers = 6; + } + + if (!image) + { + image = std::make_unique(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + image_type, + dst_format, + w, h, d, mips, layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags, + VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format)); + } + + //This method is almost exclusively used to work on framebuffer resources + //Keep the original swizzle layout unless there is data format conversion + VkComponentMapping view_swizzle; + if (!source || dst_format != source->info.format) + { + // This is a data cast operation + // Use native mapping for the new type + // TODO: Also simulate the readback+reupload step (very tricky) + const auto remap = get_component_mapping(gcm_format); + view_swizzle = { remap[1], remap[2], remap[3], remap[0] }; + } + else + { + view_swizzle = source->native_component_map; + } + + image->set_native_component_layout(view_swizzle); + auto view = image->get_view(rsx::get_remap_encoding(remap_vector), remap_vector); + + if (copy) + { + std::vector region = + { { + source, + rsx::surface_transform::coordinate_transform, + 0, + x, y, 0, 0, 0, + w, h, w, h + } }; + + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + copy_transfer_regions_impl(cmd, image.get(), region); + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + const u32 resource_memory = w * h * 4; //Rough approximate + m_temporary_storage.emplace_back(image); + m_temporary_storage.back().block_size = resource_memory; + m_temporary_memory_size += resource_memory; + + return view; + } + + vk::image_view* texture_cache::create_temporary_subresource_view(vk::command_buffer& cmd, vk::image* source, u32 gcm_format, + u16 x, u16 y, u16 w, u16 h, const rsx::texture_channel_remap_t& remap_vector) + { + return create_temporary_subresource_view_impl(cmd, source, source->info.imageType, VK_IMAGE_VIEW_TYPE_2D, + gcm_format, x, y, w, h, 1, 1, remap_vector, true); + } + + vk::image_view* texture_cache::create_temporary_subresource_view(vk::command_buffer& cmd, vk::image** source, u32 gcm_format, + u16 x, u16 y, u16 w, u16 h, const rsx::texture_channel_remap_t& remap_vector) + { + return create_temporary_subresource_view(cmd, *source, gcm_format, x, y, w, h, remap_vector); + } + + vk::image_view* texture_cache::generate_cubemap_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 size, + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) + { + auto _template = get_template_from_collection_impl(sections_to_copy); + auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_2D, + VK_IMAGE_VIEW_TYPE_CUBE, gcm_format, 0, 0, size, size, 1, 1, remap_vector, false); + + const auto image = result->image(); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); + VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 6 }; + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); + + if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) + { + VkClearColorValue clear = {}; + vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + else + { + VkClearDepthStencilValue clear = { 1.f, 0 }; + vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + + copy_transfer_regions_impl(cmd, image, sections_to_copy); + + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); + return result; + } + + vk::image_view* texture_cache::generate_3d_from_2d_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, u16 depth, + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) + { + auto _template = get_template_from_collection_impl(sections_to_copy); + auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_3D, + VK_IMAGE_VIEW_TYPE_3D, gcm_format, 0, 0, width, height, depth, 1, remap_vector, false); + + const auto image = result->image(); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); + VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); + + if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) + { + VkClearColorValue clear = {}; + vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + else + { + VkClearDepthStencilValue clear = { 1.f, 0 }; + vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + + copy_transfer_regions_impl(cmd, image, sections_to_copy); + + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); + return result; + } + + vk::image_view* texture_cache::generate_atlas_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) + { + auto _template = get_template_from_collection_impl(sections_to_copy); + auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_2D, + VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, 1, 1, remap_vector, false); + + const auto image = result->image(); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); + VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); + + if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) + { + VkClearColorValue clear = {}; + vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + else + { + VkClearDepthStencilValue clear = { 1.f, 0 }; + vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + + copy_transfer_regions_impl(cmd, image, sections_to_copy); + + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); + return result; + } + + vk::image_view* texture_cache::generate_2d_mipmaps_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) + { + const auto mipmaps = ::narrow(sections_to_copy.size()); + auto _template = get_template_from_collection_impl(sections_to_copy); + auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_2D, + VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, 1, mipmaps, remap_vector, false); + + const auto image = result->image(); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); + VkImageSubresourceRange dst_range = { dst_aspect, 0, mipmaps, 0, 1 }; + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); + + if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) + { + VkClearColorValue clear = {}; + vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + else + { + VkClearDepthStencilValue clear = { 1.f, 0 }; + vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + + copy_transfer_regions_impl(cmd, image, sections_to_copy); + + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); + return result; + } + + void texture_cache::release_temporary_subresource(vk::image_view* view) + { + auto handle = dynamic_cast(view->image()); + for (auto& e : m_temporary_storage) + { + if (e.combined_image.get() == handle) + { + e.can_reuse = true; + return; + } + } + } + + void texture_cache::update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) + { + std::vector region = + { { + src, + rsx::surface_transform::identity, + 0, + 0, 0, 0, 0, 0, + width, height, width, height + } }; + + auto dst = dst_view->image(); + dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + copy_transfer_regions_impl(cmd, dst, region); + dst->pop_layout(cmd); + } + + cached_texture_section* texture_cache::create_new_texture(vk::command_buffer& cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, + u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, bool swizzled, rsx::component_order swizzle_flags, rsx::flags32_t flags) + { + const auto section_depth = depth; + + // Define desirable attributes based on type + VkImageType image_type; + VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + u8 layer = 0; + + switch (type) + { + case rsx::texture_dimension_extended::texture_dimension_1d: + image_type = VK_IMAGE_TYPE_1D; + height = 1; + depth = 1; + layer = 1; + break; + case rsx::texture_dimension_extended::texture_dimension_2d: + image_type = VK_IMAGE_TYPE_2D; + depth = 1; + layer = 1; + break; + case rsx::texture_dimension_extended::texture_dimension_cubemap: + image_type = VK_IMAGE_TYPE_2D; + depth = 1; + layer = 6; + break; + case rsx::texture_dimension_extended::texture_dimension_3d: + image_type = VK_IMAGE_TYPE_3D; + layer = 1; + break; + default: + fmt::throw_exception("Unreachable"); + } + + // Check what actually exists at that address + const rsx::image_section_attributes_t search_desc = { .gcm_format = gcm_format, .width = width, .height = height, .depth = section_depth, .mipmaps = mipmaps }; + const bool allow_dirty = (context != rsx::texture_upload_context::framebuffer_storage); + cached_texture_section& region = *find_cached_texture(rsx_range, search_desc, true, true, allow_dirty); + ensure(!region.is_locked()); + + vk::viewable_image* image = nullptr; + if (region.exists()) + { + image = dynamic_cast(region.get_raw_texture()); + if (!image || region.get_image_type() != type || image->depth() != depth) // TODO + { + // Incompatible view/type + region.destroy(); + image = nullptr; + } + else + { + ensure(region.is_managed()); + + // Reuse + region.set_rsx_pitch(pitch); + + if (flags & texture_create_flags::initialize_image_contents) + { + // Wipe memory + image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkImageSubresourceRange range{ image->aspect(), 0, image->mipmaps(), 0, image->layers() }; + if (image->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) + { + VkClearColorValue color = { {0.f, 0.f, 0.f, 1.f} }; + vkCmdClearColorImage(cmd, image->value, image->current_layout, &color, 1, &range); + } + else + { + VkClearDepthStencilValue clear{ 1.f, 255 }; + vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &range); + } + } + } + } + + if (!image) + { + const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap; + const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format); + + image = new vk::viewable_image(*m_device, + m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + image_type, vk_format, + width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0, + VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format)); + + // New section, we must prepare it + region.reset(rsx_range); + region.set_gcm_format(gcm_format); + region.set_image_type(type); + region.create(width, height, section_depth, mipmaps, image, pitch, true, gcm_format); + } + + region.set_view_flags(swizzle_flags); + region.set_context(context); + region.set_swizzled(swizzled); + region.set_dirty(false); + + image->native_component_map = apply_component_mapping_flags(gcm_format, swizzle_flags, rsx::default_remap_vector); + + // Its not necessary to lock blit dst textures as they are just reused as necessary + switch (context) + { + case rsx::texture_upload_context::shader_read: + case rsx::texture_upload_context::blit_engine_src: + region.protect(utils::protection::ro); + read_only_range = region.get_min_max(read_only_range, rsx::section_bounds::locked_range); + break; + case rsx::texture_upload_context::blit_engine_dst: + region.set_unpack_swap_bytes(true); + no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range); + break; + case rsx::texture_upload_context::dma: + case rsx::texture_upload_context::framebuffer_storage: + // Should not initialized with this method + default: + fmt::throw_exception("Unexpected upload context 0x%x", u32(context)); + } + + update_cache_tag(); + return ®ion; + } + + cached_texture_section* texture_cache::create_nul_section(vk::command_buffer& /*cmd*/, const utils::address_range& rsx_range, bool memory_load) + { + auto& region = *find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, true, false, false); + ensure(!region.is_locked()); + + // Prepare section + region.reset(rsx_range); + region.set_context(rsx::texture_upload_context::dma); + region.set_dirty(false); + region.set_unpack_swap_bytes(true); + + if (memory_load) + { + vk::map_dma(rsx_range.start, rsx_range.length()); + vk::load_dma(rsx_range.start, rsx_range.length()); + } + + no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range); + update_cache_tag(); + return ®ion; + } + + cached_texture_section* texture_cache::upload_image_from_cpu(vk::command_buffer& cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, + rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) + { + if (context != rsx::texture_upload_context::shader_read) + { + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + } + auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled, + rsx::component_order::default_, 0); + + auto image = section->get_raw_texture(); + image->set_debug_name(fmt::format("Raw Texture @0x%x", rsx_range.start)); + + vk::enter_uninterruptible(); + + bool input_swizzled = swizzled; + if (context == rsx::texture_upload_context::blit_engine_src) + { + // Swizzling is ignored for blit engine copy and emulated using remapping + input_swizzled = false; + } + + rsx::flags32_t upload_command_flags = initialize_image_layout | + (rsx::get_current_renderer()->get_backend_config().supports_asynchronous_compute ? upload_contents_async : upload_contents_inline); + + vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, image->aspect(), + *m_texture_upload_heap, upload_heap_align_default, upload_command_flags); + + vk::leave_uninterruptible(); + + if (context != rsx::texture_upload_context::shader_read) + { + // Insert appropriate barrier depending on use. Shader read resources should be lazy-initialized before consuming. + // TODO: All texture resources should be initialized on use, this is wasteful + + VkImageLayout preferred_layout; + switch (context) + { + default: + preferred_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case rsx::texture_upload_context::blit_engine_dst: + preferred_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + break; + case rsx::texture_upload_context::blit_engine_src: + preferred_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + break; + } + + if (preferred_layout != image->current_layout) + { + image->change_layout(cmd, preferred_layout); + } + else + { + // Insert ordering barrier + ensure(preferred_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + insert_image_memory_barrier(cmd, image->value, image->current_layout, preferred_layout, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + { image->aspect(), 0, image->mipmaps(), 0, image->layers() }); + } + } + + section->last_write_tag = rsx::get_shared_tag(); + return section; + } + + void texture_cache::set_component_order(cached_texture_section& section, u32 gcm_format, rsx::component_order expected_flags) + { + if (expected_flags == section.get_view_flags()) + return; + + const VkComponentMapping mapping = apply_component_mapping_flags(gcm_format, expected_flags, rsx::default_remap_vector); + auto image = static_cast(section.get_raw_texture()); + + ensure(image); + image->set_native_component_layout(mapping); + + section.set_view_flags(expected_flags); + } + + void texture_cache::insert_texture_barrier(vk::command_buffer& cmd, vk::image* tex, bool strong_ordering) + { + if (!strong_ordering && tex->current_layout == VK_IMAGE_LAYOUT_GENERAL) + { + // A previous barrier already exists, do nothing + return; + } + + vk::as_rtt(tex)->texture_barrier(cmd); + } + + bool texture_cache::render_target_format_is_compatible(vk::image* tex, u32 gcm_format) + { + auto vk_format = tex->info.format; + switch (gcm_format) + { + default: + //TODO + warn_once("Format incompatibility detected, reporting failure to force data copy (VK_FORMAT=0x%X, GCM_FORMAT=0x%X)", static_cast(vk_format), gcm_format); + return false; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return (vk_format == VK_FORMAT_R16G16B16A16_SFLOAT); + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return (vk_format == VK_FORMAT_R32G32B32A32_SFLOAT); + case CELL_GCM_TEXTURE_X32_FLOAT: + return (vk_format == VK_FORMAT_R32_SFLOAT); + case CELL_GCM_TEXTURE_R5G6B5: + return (vk_format == VK_FORMAT_R5G6B5_UNORM_PACK16); + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + return (vk_format == VK_FORMAT_B8G8R8A8_UNORM || vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); + case CELL_GCM_TEXTURE_B8: + return (vk_format == VK_FORMAT_R8_UNORM); + case CELL_GCM_TEXTURE_G8B8: + return (vk_format == VK_FORMAT_R8G8_UNORM); + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return (vk_format == VK_FORMAT_D16_UNORM || vk_format == VK_FORMAT_D32_SFLOAT); + } + } + + void texture_cache::prepare_for_dma_transfers(vk::command_buffer& cmd) + { + if (!cmd.is_recording()) + { + cmd.begin(); + } + } + + void texture_cache::cleanup_after_dma_transfers(vk::command_buffer& cmd) + { + bool occlusion_query_active = !!(cmd.flags & vk::command_buffer::cb_has_open_query); + if (occlusion_query_active) + { + // We really stepped in it + vk::do_query_cleanup(cmd); + } + + // End recording + cmd.end(); + + if (cmd.access_hint != vk::command_buffer::access_type_hint::all) + { + // Flush any pending async jobs in case of blockers + // TODO: Context-level manager should handle this logic + g_fxo->get().flush(VK_TRUE); + + // Primary access command queue, must restart it after + vk::fence submit_fence(*m_device); + cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, &submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE); + + vk::wait_for_fence(&submit_fence, GENERAL_WAIT_TIMEOUT); + + CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); + cmd.begin(); + } + else + { + // Auxilliary command queue with auto-restart capability + cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE); + } + + ensure(cmd.flags == 0); + + if (occlusion_query_active) + { + ensure(cmd.is_recording()); + cmd.flags |= vk::command_buffer::cb_load_occluson_task; + } + } + + void texture_cache::initialize(vk::render_device& device, VkQueue submit_queue, vk::data_heap& upload_heap) + { + m_device = &device; + m_memory_types = device.get_memory_mapping(); + m_formats_support = device.get_formats_support(); + m_submit_queue = submit_queue; + m_texture_upload_heap = &upload_heap; + } + + void texture_cache::destroy() + { + clear(); + } + + bool texture_cache::is_depth_texture(u32 rsx_address, u32 rsx_size) + { + reader_lock lock(m_cache_mutex); + + auto& block = m_storage.block_for(rsx_address); + + if (block.get_locked_count() == 0) + return false; + + for (auto& tex : block) + { + if (tex.is_dirty()) + continue; + + if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range)) + continue; + + if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) + { + switch (tex.get_format()) + { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + return true; + default: + return false; + } + } + } + + //Unreachable; silence compiler warning anyway + return false; + } + + void texture_cache::on_frame_end() + { + trim_sections(); + + if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects || + m_temporary_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources + { + purge_unreleased_sections(); + } + + const u64 last_complete_frame = vk::get_last_completed_frame_id(); + m_temporary_storage.remove_if([&](const temporary_storage& o) + { + if (!o.block_size || o.test(last_complete_frame)) + { + m_temporary_memory_size -= o.block_size; + return true; + } + return false; + }); + + m_temporary_subresource_cache.clear(); + reset_frame_statistics(); + + baseclass::on_frame_end(); + } + + vk::image* texture_cache::upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch) + { + bool linear_format_supported = false; + + switch (format) + { + case VK_FORMAT_B8G8R8A8_UNORM: + linear_format_supported = m_formats_support.bgra8_linear; + break; + case VK_FORMAT_R8G8B8A8_UNORM: + linear_format_supported = m_formats_support.argb8_linear; + break; + default: + rsx_log.error("Unsupported VkFormat 0x%x", static_cast(format)); + return nullptr; + } + + if (!linear_format_supported) + { + return nullptr; + } + + // Uploads a linear memory range as a BGRA8 texture + auto image = std::make_unique(*m_device, m_memory_types.host_visible_coherent, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + VK_IMAGE_TYPE_2D, + format, + width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_PREINITIALIZED, + VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0, + VMM_ALLOCATION_POOL_UNDEFINED); + + VkImageSubresource subresource{}; + subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + + VkSubresourceLayout layout{}; + vkGetImageSubresourceLayout(*m_device, image->value, &subresource, &layout); + + void* mem = image->memory->map(0, layout.rowPitch * height); + + auto src = vm::_ptr(address); + auto dst = static_cast(mem); + + //TODO: SSE optimization + for (u32 row = 0; row < height; ++row) + { + auto casted_src = reinterpret_cast*>(src); + auto casted_dst = reinterpret_cast(dst); + + for (u32 col = 0; col < width; ++col) + casted_dst[col] = casted_src[col]; + + src += pitch; + dst += layout.rowPitch; + } + + image->memory->unmap(); + + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + auto result = image.get(); + const u32 resource_memory = width * height * 4; //Rough approximate + m_temporary_storage.emplace_back(image); + m_temporary_storage.back().block_size = resource_memory; + m_temporary_memory_size += resource_memory; + + return result; + } + + bool texture_cache::blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, vk::surface_cache& m_rtts, vk::command_buffer& cmd) + { + blitter helper; + auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper); + + if (reply.succeeded) + { + if (reply.real_dst_size) + { + flush_if_cache_miss_likely(cmd, reply.to_address_range()); + } + + return true; + } + + return false; + } + + u32 texture_cache::get_unreleased_textures_count() const + { + return baseclass::get_unreleased_textures_count() + ::size32(m_temporary_storage); + } + + u32 texture_cache::get_temporary_memory_in_use() const + { + return m_temporary_memory_size; + } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index f33927c03b..ad965ed59b 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -40,12 +40,12 @@ namespace vk //DMA relevant data std::unique_ptr dma_fence; vk::render_device* m_device = nullptr; - vk::viewable_image *vram_texture = nullptr; + vk::viewable_image* vram_texture = nullptr; public: using baseclass::cached_texture_section; - void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false) + void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image* image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false) { auto new_texture = static_cast(image); ensure(!exists() || !is_managed() || vram_texture == new_texture); @@ -189,7 +189,7 @@ namespace vk m_device = &cmd.get_command_pool().get_owner(); } - vk::image *locked_resource = vram_texture; + vk::image* locked_resource = vram_texture; u32 transfer_width = width; u32 transfer_height = height; u32 transfer_x = 0, transfer_y = 0; @@ -265,7 +265,7 @@ namespace vk vk::wait_for_event(dma_fence.get(), GENERAL_WAIT_TIMEOUT); // Calculate smallest range to flush - for framebuffers, the raster region is enough - const auto range = (context == rsx::texture_upload_context::framebuffer_storage)? get_section_range() : get_confirmed_range(); + const auto range = (context == rsx::texture_upload_context::framebuffer_storage) ? get_section_range() : get_confirmed_range(); vk::flush_dma(range.start, range.length()); if (is_swizzled()) @@ -301,8 +301,10 @@ namespace vk } } - void *map_synchronized(u32, u32) - { return nullptr; } + void* map_synchronized(u32, u32) + { + return nullptr; + } void finish_flush() {} @@ -413,13 +415,7 @@ namespace vk initialize_image_contents = 1, }; - void on_section_destroyed(cached_texture_section& tex) override - { - if (tex.is_managed()) - { - vk::get_resource_manager()->dispose(tex.get_texture()); - } - } + void on_section_destroyed(cached_texture_section& tex) override; private: @@ -434,858 +430,79 @@ namespace vk std::list m_temporary_storage; atomic_t m_temporary_memory_size = { 0 }; - void clear() - { - baseclass::clear(); + void clear(); - m_temporary_storage.clear(); - m_temporary_memory_size = 0; - } - - VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::component_order flags, const rsx::texture_channel_remap_t& remap_vector) const - { - switch (gcm_format) - { - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - //Dont bother letting this propagate - return{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; - default: - break; - } - - VkComponentMapping mapping = {}; - switch (flags) - { - case rsx::component_order::default_: - { - mapping = vk::apply_swizzle_remap(vk::get_component_mapping(gcm_format), remap_vector); - break; - } - case rsx::component_order::native: - { - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; - break; - } - case rsx::component_order::swapped_native: - { - mapping = { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; - break; - } - default: - break; - } - - return mapping; - } + VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::component_order flags, const rsx::texture_channel_remap_t& remap_vector) const; void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector& sections_to_transfer) const; - vk::image* get_template_from_collection_impl(const std::vector& sections_to_transfer) const - { - if (sections_to_transfer.size() == 1) [[likely]] - { - return sections_to_transfer.front().src; - } + vk::image* get_template_from_collection_impl(const std::vector& sections_to_transfer) const; - vk::image* result = nullptr; - for (const auto §ion : sections_to_transfer) - { - if (!section.src) - continue; + std::unique_ptr find_temporary_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps); - if (!result) - { - result = section.src; - } - else - { - if (section.src->native_component_map.a != result->native_component_map.a || - section.src->native_component_map.r != result->native_component_map.r || - section.src->native_component_map.g != result->native_component_map.g || - section.src->native_component_map.b != result->native_component_map.b) - { - // TODO - // This requires a far more complex setup as its not always possible to mix and match without compute assistance - return nullptr; - } - } - } - - return result; - } - - std::unique_ptr find_temporary_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps) - { - //const auto current_frame = vk::get_current_frame_id(); - for (auto &e : m_temporary_storage) - { - if (e.can_reuse && e.matches(format, w, h, d, mipmaps, 0)) - { - m_temporary_memory_size -= e.block_size; - e.block_size = 0; - return std::move(e.combined_image); - } - } - - return {}; - } - - std::unique_ptr find_temporary_cubemap(VkFormat format, u16 size) - { - //const auto current_frame = vk::get_current_frame_id(); - for (auto &e : m_temporary_storage) - { - if (e.can_reuse && e.matches(format, size, size, 1, 1, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) - { - m_temporary_memory_size -= e.block_size; - e.block_size = 0; - return std::move(e.combined_image); - } - } - - return {}; - } + std::unique_ptr find_temporary_cubemap(VkFormat format, u16 size); protected: vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, - u32 gcm_format, u16 x, u16 y, u16 w, u16 h, u16 d, u8 mips, const rsx::texture_channel_remap_t& remap_vector, bool copy) - { - std::unique_ptr image; - - VkImageCreateFlags image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; - VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); - u16 layers = 1; - - if (!image_flags) [[likely]] - { - image = find_temporary_image(dst_format, w, h, 1, mips); - } - else - { - image = find_temporary_cubemap(dst_format, w); - layers = 6; - } - - if (!image) - { - image = std::make_unique(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - image_type, - dst_format, - w, h, d, mips, layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags, - VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format)); - } - - //This method is almost exclusively used to work on framebuffer resources - //Keep the original swizzle layout unless there is data format conversion - VkComponentMapping view_swizzle; - if (!source || dst_format != source->info.format) - { - // This is a data cast operation - // Use native mapping for the new type - // TODO: Also simulate the readback+reupload step (very tricky) - const auto remap = get_component_mapping(gcm_format); - view_swizzle = { remap[1], remap[2], remap[3], remap[0] }; - } - else - { - view_swizzle = source->native_component_map; - } - - image->set_native_component_layout(view_swizzle); - auto view = image->get_view(rsx::get_remap_encoding(remap_vector), remap_vector); - - if (copy) - { - std::vector region = - {{ - source, - rsx::surface_transform::coordinate_transform, - 0, - x, y, 0, 0, 0, - w, h, w, h - }}; - - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - copy_transfer_regions_impl(cmd, image.get(), region); - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - const u32 resource_memory = w * h * 4; //Rough approximate - m_temporary_storage.emplace_back(image); - m_temporary_storage.back().block_size = resource_memory; - m_temporary_memory_size += resource_memory; - - return view; - } + u32 gcm_format, u16 x, u16 y, u16 w, u16 h, u16 d, u8 mips, const rsx::texture_channel_remap_t& remap_vector, bool copy); vk::image_view* create_temporary_subresource_view(vk::command_buffer& cmd, vk::image* source, u32 gcm_format, - u16 x, u16 y, u16 w, u16 h, const rsx::texture_channel_remap_t& remap_vector) override - { - return create_temporary_subresource_view_impl(cmd, source, source->info.imageType, VK_IMAGE_VIEW_TYPE_2D, - gcm_format, x, y, w, h, 1, 1, remap_vector, true); - } + u16 x, u16 y, u16 w, u16 h, const rsx::texture_channel_remap_t& remap_vector) override; vk::image_view* create_temporary_subresource_view(vk::command_buffer& cmd, vk::image** source, u32 gcm_format, - u16 x, u16 y, u16 w, u16 h, const rsx::texture_channel_remap_t& remap_vector) override - { - return create_temporary_subresource_view(cmd, *source, gcm_format, x, y, w, h, remap_vector); - } + u16 x, u16 y, u16 w, u16 h, const rsx::texture_channel_remap_t& remap_vector) override; vk::image_view* generate_cubemap_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 size, - const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override - { - auto _template = get_template_from_collection_impl(sections_to_copy); - auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_2D, - VK_IMAGE_VIEW_TYPE_CUBE, gcm_format, 0, 0, size, size, 1, 1, remap_vector, false); - - const auto image = result->image(); - VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); - VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 6 }; - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); - - if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) - { - VkClearColorValue clear = {}; - vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } - else - { - VkClearDepthStencilValue clear = { 1.f, 0 }; - vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } - - copy_transfer_regions_impl(cmd, image, sections_to_copy); - - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); - return result; - } + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override; vk::image_view* generate_3d_from_2d_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, u16 depth, - const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override - { - auto _template = get_template_from_collection_impl(sections_to_copy); - auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_3D, - VK_IMAGE_VIEW_TYPE_3D, gcm_format, 0, 0, width, height, depth, 1, remap_vector, false); - - const auto image = result->image(); - VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); - VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); - - if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) - { - VkClearColorValue clear = {}; - vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } - else - { - VkClearDepthStencilValue clear = { 1.f, 0 }; - vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } - - copy_transfer_regions_impl(cmd, image, sections_to_copy); - - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); - return result; - } + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override; vk::image_view* generate_atlas_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, - const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override - { - auto _template = get_template_from_collection_impl(sections_to_copy); - auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_2D, - VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, 1, 1, remap_vector, false); - - const auto image = result->image(); - VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); - VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); - - if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) - { - VkClearColorValue clear = {}; - vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } - else - { - VkClearDepthStencilValue clear = { 1.f, 0 }; - vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } - - copy_transfer_regions_impl(cmd, image, sections_to_copy); - - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); - return result; - } + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override; vk::image_view* generate_2d_mipmaps_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, - const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override - { - const auto mipmaps = ::narrow(sections_to_copy.size()); - auto _template = get_template_from_collection_impl(sections_to_copy); - auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_2D, - VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, 1, mipmaps, remap_vector, false); + const std::vector& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector) override; - const auto image = result->image(); - VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); - VkImageSubresourceRange dst_range = { dst_aspect, 0, mipmaps, 0, 1 }; - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); + void release_temporary_subresource(vk::image_view* view) override; - if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) - { - VkClearColorValue clear = {}; - vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } - else - { - VkClearDepthStencilValue clear = { 1.f, 0 }; - vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); - } + void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override; - copy_transfer_regions_impl(cmd, image, sections_to_copy); + cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, + u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, bool swizzled, rsx::component_order swizzle_flags, rsx::flags32_t flags) override; - vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); - return result; - } - - void release_temporary_subresource(vk::image_view* view) override - { - auto handle = dynamic_cast(view->image()); - for (auto& e : m_temporary_storage) - { - if (e.combined_image.get() == handle) - { - e.can_reuse = true; - return; - } - } - } - - void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override - { - std::vector region = - { { - src, - rsx::surface_transform::identity, - 0, - 0, 0, 0, 0, 0, - width, height, width, height - }}; - - auto dst = dst_view->image(); - dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - copy_transfer_regions_impl(cmd, dst, region); - dst->pop_layout(cmd); - } - - cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, - u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, bool swizzled, rsx::component_order swizzle_flags, rsx::flags32_t flags) override - { - const auto section_depth = depth; - - // Define desirable attributes based on type - VkImageType image_type; - VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - u8 layer = 0; - - switch (type) - { - case rsx::texture_dimension_extended::texture_dimension_1d: - image_type = VK_IMAGE_TYPE_1D; - height = 1; - depth = 1; - layer = 1; - break; - case rsx::texture_dimension_extended::texture_dimension_2d: - image_type = VK_IMAGE_TYPE_2D; - depth = 1; - layer = 1; - break; - case rsx::texture_dimension_extended::texture_dimension_cubemap: - image_type = VK_IMAGE_TYPE_2D; - depth = 1; - layer = 6; - break; - case rsx::texture_dimension_extended::texture_dimension_3d: - image_type = VK_IMAGE_TYPE_3D; - layer = 1; - break; - default: - fmt::throw_exception("Unreachable"); - } - - // Check what actually exists at that address - const rsx::image_section_attributes_t search_desc = { .gcm_format = gcm_format, .width = width, .height = height, .depth = section_depth, .mipmaps = mipmaps }; - const bool allow_dirty = (context != rsx::texture_upload_context::framebuffer_storage); - cached_texture_section& region = *find_cached_texture(rsx_range, search_desc, true, true, allow_dirty); - ensure(!region.is_locked()); - - vk::viewable_image* image = nullptr; - if (region.exists()) - { - image = dynamic_cast(region.get_raw_texture()); - if (!image || region.get_image_type() != type || image->depth() != depth) // TODO - { - // Incompatible view/type - region.destroy(); - image = nullptr; - } - else - { - ensure(region.is_managed()); - - // Reuse - region.set_rsx_pitch(pitch); - - if (flags & texture_create_flags::initialize_image_contents) - { - // Wipe memory - image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - VkImageSubresourceRange range{ image->aspect(), 0, image->mipmaps(), 0, image->layers() }; - if (image->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) - { - VkClearColorValue color = { {0.f, 0.f, 0.f, 1.f} }; - vkCmdClearColorImage(cmd, image->value, image->current_layout, &color, 1, &range); - } - else - { - VkClearDepthStencilValue clear{ 1.f, 255 }; - vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &range); - } - } - } - } - - if (!image) - { - const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap; - const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format); - - image = new vk::viewable_image(*m_device, m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - image_type, - vk_format, - width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0, - VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format)); - - // New section, we must prepare it - region.reset(rsx_range); - region.set_gcm_format(gcm_format); - region.set_image_type(type); - region.create(width, height, section_depth, mipmaps, image, pitch, true, gcm_format); - } - - region.set_view_flags(swizzle_flags); - region.set_context(context); - region.set_swizzled(swizzled); - region.set_dirty(false); - - image->native_component_map = apply_component_mapping_flags(gcm_format, swizzle_flags, rsx::default_remap_vector); - - // Its not necessary to lock blit dst textures as they are just reused as necessary - switch (context) - { - case rsx::texture_upload_context::shader_read: - case rsx::texture_upload_context::blit_engine_src: - region.protect(utils::protection::ro); - read_only_range = region.get_min_max(read_only_range, rsx::section_bounds::locked_range); - break; - case rsx::texture_upload_context::blit_engine_dst: - region.set_unpack_swap_bytes(true); - no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range); - break; - case rsx::texture_upload_context::dma: - case rsx::texture_upload_context::framebuffer_storage: - // Should not initialized with this method - default: - fmt::throw_exception("Unexpected upload context 0x%x", u32(context)); - } - - update_cache_tag(); - return ®ion; - } - - cached_texture_section* create_nul_section(vk::command_buffer& /*cmd*/, const utils::address_range& rsx_range, bool memory_load) override - { - auto& region = *find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, true, false, false); - ensure(!region.is_locked()); - - // Prepare section - region.reset(rsx_range); - region.set_context(rsx::texture_upload_context::dma); - region.set_dirty(false); - region.set_unpack_swap_bytes(true); - - if (memory_load) - { - vk::map_dma(rsx_range.start, rsx_range.length()); - vk::load_dma(rsx_range.start, rsx_range.length()); - } - - no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range); - update_cache_tag(); - return ®ion; - } + cached_texture_section* create_nul_section(vk::command_buffer& /*cmd*/, const utils::address_range& rsx_range, bool memory_load) override; cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, - rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override - { - if (context != rsx::texture_upload_context::shader_read) - { - if (vk::is_renderpass_open(cmd)) - { - vk::end_renderpass(cmd); - } - } - auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled, - rsx::component_order::default_, 0); + rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override; - auto image = section->get_raw_texture(); - image->set_debug_name(fmt::format("Raw Texture @0x%x", rsx_range.start)); + void set_component_order(cached_texture_section& section, u32 gcm_format, rsx::component_order expected_flags) override; - vk::enter_uninterruptible(); + void insert_texture_barrier(vk::command_buffer& cmd, vk::image* tex, bool strong_ordering) override; - bool input_swizzled = swizzled; - if (context == rsx::texture_upload_context::blit_engine_src) - { - // Swizzling is ignored for blit engine copy and emulated using remapping - input_swizzled = false; - } + bool render_target_format_is_compatible(vk::image* tex, u32 gcm_format) override; - rsx::flags32_t upload_command_flags = initialize_image_layout | - (rsx::get_current_renderer()->get_backend_config().supports_asynchronous_compute ? upload_contents_async : upload_contents_inline); + void prepare_for_dma_transfers(vk::command_buffer& cmd) override; - vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, image->aspect(), - *m_texture_upload_heap, upload_heap_align_default, upload_command_flags); - - vk::leave_uninterruptible(); - - if (context != rsx::texture_upload_context::shader_read) - { - // Insert appropriate barrier depending on use. Shader read resources should be lazy-initialized before consuming. - // TODO: All texture resources should be initialized on use, this is wasteful - - VkImageLayout preferred_layout; - switch (context) - { - default: - preferred_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - break; - case rsx::texture_upload_context::blit_engine_dst: - preferred_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - break; - case rsx::texture_upload_context::blit_engine_src: - preferred_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - break; - } - - if (preferred_layout != image->current_layout) - { - image->change_layout(cmd, preferred_layout); - } - else - { - // Insert ordering barrier - ensure(preferred_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - insert_image_memory_barrier(cmd, image->value, image->current_layout, preferred_layout, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - { image->aspect(), 0, image->mipmaps(), 0, image->layers() }); - } - } - - section->last_write_tag = rsx::get_shared_tag(); - return section; - } - - void set_component_order(cached_texture_section& section, u32 gcm_format, rsx::component_order expected_flags) override - { - if (expected_flags == section.get_view_flags()) - return; - - const VkComponentMapping mapping = apply_component_mapping_flags(gcm_format, expected_flags, rsx::default_remap_vector); - auto image = static_cast(section.get_raw_texture()); - - ensure(image); - image->set_native_component_layout(mapping); - - section.set_view_flags(expected_flags); - } - - void insert_texture_barrier(vk::command_buffer& cmd, vk::image* tex, bool strong_ordering) override - { - if (!strong_ordering && tex->current_layout == VK_IMAGE_LAYOUT_GENERAL) - { - // A previous barrier already exists, do nothing - return; - } - - vk::as_rtt(tex)->texture_barrier(cmd); - } - - bool render_target_format_is_compatible(vk::image* tex, u32 gcm_format) override - { - auto vk_format = tex->info.format; - switch (gcm_format) - { - default: - //TODO - warn_once("Format incompatibility detected, reporting failure to force data copy (VK_FORMAT=0x%X, GCM_FORMAT=0x%X)", static_cast(vk_format), gcm_format); - return false; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return (vk_format == VK_FORMAT_R16G16B16A16_SFLOAT); - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - return (vk_format == VK_FORMAT_R32G32B32A32_SFLOAT); - case CELL_GCM_TEXTURE_X32_FLOAT: - return (vk_format == VK_FORMAT_R32_SFLOAT); - case CELL_GCM_TEXTURE_R5G6B5: - return (vk_format == VK_FORMAT_R5G6B5_UNORM_PACK16); - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_D8R8G8B8: - return (vk_format == VK_FORMAT_B8G8R8A8_UNORM || vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); - case CELL_GCM_TEXTURE_B8: - return (vk_format == VK_FORMAT_R8_UNORM); - case CELL_GCM_TEXTURE_G8B8: - return (vk_format == VK_FORMAT_R8G8_UNORM); - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - return (vk_format == VK_FORMAT_D16_UNORM || vk_format == VK_FORMAT_D32_SFLOAT); - } - } - - void prepare_for_dma_transfers(vk::command_buffer& cmd) override - { - if (!cmd.is_recording()) - { - cmd.begin(); - } - } - - void cleanup_after_dma_transfers(vk::command_buffer& cmd) override - { - bool occlusion_query_active = !!(cmd.flags & vk::command_buffer::cb_has_open_query); - if (occlusion_query_active) - { - // We really stepped in it - vk::do_query_cleanup(cmd); - } - - // End recording - cmd.end(); - - if (cmd.access_hint != vk::command_buffer::access_type_hint::all) - { - // Flush any pending async jobs in case of blockers - // TODO: Context-level manager should handle this logic - g_fxo->get().flush(VK_TRUE); - - // Primary access command queue, must restart it after - vk::fence submit_fence(*m_device); - cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, &submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE); - - vk::wait_for_fence(&submit_fence, GENERAL_WAIT_TIMEOUT); - - CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); - cmd.begin(); - } - else - { - // Auxilliary command queue with auto-restart capability - cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE); - } - - ensure(cmd.flags == 0); - - if (occlusion_query_active) - { - ensure(cmd.is_recording()); - cmd.flags |= vk::command_buffer::cb_load_occluson_task; - } - } + void cleanup_after_dma_transfers(vk::command_buffer& cmd) override; public: using baseclass::texture_cache; - void initialize(vk::render_device& device, VkQueue submit_queue, vk::data_heap& upload_heap) - { - m_device = &device; - m_memory_types = device.get_memory_mapping(); - m_formats_support = device.get_formats_support(); - m_submit_queue = submit_queue; - m_texture_upload_heap = &upload_heap; - } + void initialize(vk::render_device& device, VkQueue submit_queue, vk::data_heap& upload_heap); - void destroy() override - { - clear(); - } + void destroy() override; - bool is_depth_texture(u32 rsx_address, u32 rsx_size) override - { - reader_lock lock(m_cache_mutex); + bool is_depth_texture(u32 rsx_address, u32 rsx_size) override; - auto &block = m_storage.block_for(rsx_address); + void on_frame_end() override; - if (block.get_locked_count() == 0) - return false; + vk::image* upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch); - for (auto& tex : block) - { - if (tex.is_dirty()) - continue; + bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, vk::surface_cache& m_rtts, vk::command_buffer& cmd); - if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range)) - continue; + u32 get_unreleased_textures_count() const override; - if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) - { - switch (tex.get_format()) - { - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D32_SFLOAT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - return true; - default: - return false; - } - } - } - - //Unreachable; silence compiler warning anyway - return false; - } - - void on_frame_end() override - { - trim_sections(); - - if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects || - m_temporary_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources - { - purge_unreleased_sections(); - } - - const u64 last_complete_frame = vk::get_last_completed_frame_id(); - m_temporary_storage.remove_if([&](const temporary_storage& o) - { - if (!o.block_size || o.test(last_complete_frame)) - { - m_temporary_memory_size -= o.block_size; - return true; - } - return false; - }); - - m_temporary_subresource_cache.clear(); - reset_frame_statistics(); - - baseclass::on_frame_end(); - } - - vk::image *upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch) - { - bool linear_format_supported = false; - - switch (format) - { - case VK_FORMAT_B8G8R8A8_UNORM: - linear_format_supported = m_formats_support.bgra8_linear; - break; - case VK_FORMAT_R8G8B8A8_UNORM: - linear_format_supported = m_formats_support.argb8_linear; - break; - default: - rsx_log.error("Unsupported VkFormat 0x%x", static_cast(format)); - return nullptr; - } - - if (!linear_format_supported) - { - return nullptr; - } - - // Uploads a linear memory range as a BGRA8 texture - auto image = std::make_unique(*m_device, m_memory_types.host_visible_coherent, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - VK_IMAGE_TYPE_2D, - format, - width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_PREINITIALIZED, - VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - 0, VMM_ALLOCATION_POOL_TEXTURE_CACHE); - - VkImageSubresource subresource{}; - subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - - VkSubresourceLayout layout{}; - vkGetImageSubresourceLayout(*m_device, image->value, &subresource, &layout); - - void* mem = image->memory->map(0, layout.rowPitch * height); - - auto src = vm::_ptr(address); - auto dst = static_cast(mem); - - //TODO: SSE optimization - for (u32 row = 0; row < height; ++row) - { - auto casted_src = reinterpret_cast*>(src); - auto casted_dst = reinterpret_cast(dst); - - for (u32 col = 0; col < width; ++col) - casted_dst[col] = casted_src[col]; - - src += pitch; - dst += layout.rowPitch; - } - - image->memory->unmap(); - - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - - auto result = image.get(); - const u32 resource_memory = width * height * 4; //Rough approximate - m_temporary_storage.emplace_back(image); - m_temporary_storage.back().block_size = resource_memory; - m_temporary_memory_size += resource_memory; - - return result; - } - - bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, vk::surface_cache& m_rtts, vk::command_buffer& cmd) - { - blitter helper; - auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper); - - if (reply.succeeded) - { - if (reply.real_dst_size) - { - flush_if_cache_miss_likely(cmd, reply.to_address_range()); - } - - return true; - } - - return false; - } - - u32 get_unreleased_textures_count() const override - { - return baseclass::get_unreleased_textures_count() + ::size32(m_temporary_storage); - } - - u32 get_temporary_memory_in_use() - { - return m_temporary_memory_size; - } + u32 get_temporary_memory_in_use() const; }; }