diff --git a/rpcs3/Emu/RSX/VK/VKDMA.cpp b/rpcs3/Emu/RSX/VK/VKDMA.cpp index 14f31919fc..4295fbebe7 100644 --- a/rpcs3/Emu/RSX/VK/VKDMA.cpp +++ b/rpcs3/Emu/RSX/VK/VKDMA.cpp @@ -34,10 +34,16 @@ namespace vk return inheritance_info.parent->map_range(range); } + if (memory_mapping == nullptr) + { + memory_mapping = static_cast(allocated_memory->map(0, VK_WHOLE_SIZE)); + ensure(memory_mapping); + } + ensure(range.start >= base_address); u32 start = range.start; start -= base_address; - return allocated_memory->map(start, range.length()); + return memory_mapping + start; } void dma_block::unmap() @@ -49,6 +55,7 @@ namespace vk else { allocated_memory->unmap(); + memory_mapping = nullptr; } } @@ -73,6 +80,7 @@ namespace vk auto gc = vk::get_resource_manager(); gc->dispose(allocated_memory); + memory_mapping = nullptr; } } @@ -107,8 +115,7 @@ namespace vk auto dst = vm::get_super_ptr(range.start); std::memcpy(dst, src, range.length()); - // TODO: Clear page bits - unmap(); + // NOTE: Do not unmap. This can be extremely slow on some platforms. } void dma_block::load(const utils::address_range& range) @@ -124,8 +131,7 @@ namespace vk auto dst = map_range(range); std::memcpy(dst, src, range.length()); - // TODO: Clear page bits to sychronized - unmap(); + // NOTE: Do not unmap. This can be extremely slow on some platforms. } std::pair dma_block::get(const utils::address_range& range) diff --git a/rpcs3/Emu/RSX/VK/VKDMA.h b/rpcs3/Emu/RSX/VK/VKDMA.h index 55d016c9e6..35587c74c9 100644 --- a/rpcs3/Emu/RSX/VK/VKDMA.h +++ b/rpcs3/Emu/RSX/VK/VKDMA.h @@ -22,6 +22,7 @@ namespace vk inheritance_info; u32 base_address = 0; + u8* memory_mapping = nullptr; std::unique_ptr allocated_memory; virtual void allocate(const render_device& dev, usz size); diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index b5ebf2fec3..75980ede46 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -880,7 +880,7 @@ namespace vk VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags) { const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT); - rsx::texture_uploader_capabilities caps{ .supports_zero_copy = true, .alignment = heap_align }; + rsx::texture_uploader_capabilities caps{ .alignment = heap_align }; rsx::texture_memory_info opt{}; bool check_caps = true; @@ -896,15 +896,6 @@ namespace vk std::vector> upload_commands; copy_regions.reserve(subresource_layout.size()); -#ifndef _WIN32 - // RADV workaround. Buffer-to-buffer transfers are extremely slow and invoke memmove in vkCmdCopyBuffer. - if (const auto vendor = vk::get_driver_vendor(); - vendor == driver_vendor::RADV) - { - caps.supports_zero_copy = false; - } -#endif - for (const rsx::subresource_layout &layout : subresource_layout) { const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout); @@ -922,7 +913,7 @@ namespace vk { caps.supports_byteswap = (image_linear_size >= 1024); caps.supports_hw_deswizzle = caps.supports_byteswap; - caps.supports_zero_copy = caps.supports_zero_copy && caps.supports_byteswap; + caps.supports_zero_copy = caps.supports_byteswap; caps.supports_vtc_decoding = false; check_caps = false; }