From dbcdb3523bc12a461256d66fb853b6ef7b50ed66 Mon Sep 17 00:00:00 2001 From: german77 Date: Fri, 19 May 2023 21:19:29 -0600 Subject: [PATCH 1/5] input_common: Map motion with relative values not absolute ones --- src/input_common/input_engine.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/input_common/input_engine.cpp b/src/input_common/input_engine.cpp index 91aa96aa73..e4c5b5b3c7 100644 --- a/src/input_common/input_engine.cpp +++ b/src/input_common/input_engine.cpp @@ -380,13 +380,16 @@ void InputEngine::TriggerOnMotionChange(const PadIdentifier& identifier, int mot if (!configuring || !mapping_callback.on_data) { return; } + const auto old_value = GetMotion(identifier, motion); bool is_active = false; - if (std::abs(value.accel_x) > 1.5f || std::abs(value.accel_y) > 1.5f || - std::abs(value.accel_z) > 1.5f) { + if (std::abs(value.accel_x - old_value.accel_x) > 1.5f || + std::abs(value.accel_y - old_value.accel_y) > 1.5f || + std::abs(value.accel_z - old_value.accel_z) > 1.5f) { is_active = true; } - if (std::abs(value.gyro_x) > 0.6f || std::abs(value.gyro_y) > 0.6f || - std::abs(value.gyro_z) > 0.6f) { + if (std::abs(value.gyro_x - old_value.gyro_x) > 0.6f || + std::abs(value.gyro_y - old_value.gyro_y) > 0.6f || + std::abs(value.gyro_z - old_value.gyro_z) > 0.6f) { is_active = true; } if (!is_active) { From 4e491ab59ba8b9c08253ca9cce5bb9fe909ac2ff Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 20 May 2023 14:08:05 +0300 Subject: [PATCH 2/5] vk_master_semaphore: Move fence wait on separate thread --- .../renderer_vulkan/vk_master_semaphore.cpp | 52 +++++++++++++++++-- .../renderer_vulkan/vk_master_semaphore.h | 15 +++++- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 47c74e4d87..8b65aeaebc 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -10,11 +10,16 @@ namespace Vulkan { +constexpr u64 FENCE_RESERVE_SIZE = 8; + MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) { if (!device.HasTimelineSemaphore()) { static constexpr VkFenceCreateInfo fence_ci{ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; - fence = device.GetLogical().CreateFence(fence_ci); + free_queue.resize(FENCE_RESERVE_SIZE); + std::ranges::generate(free_queue, + [&] { return device.GetLogical().CreateFence(fence_ci); }); + wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); }); return; } @@ -167,16 +172,53 @@ VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphor .pSignalSemaphores = &signal_semaphore, }; + auto fence = GetFreeFence(); auto result = device.GetGraphicsQueue().Submit(submit_info, *fence); if (result == VK_SUCCESS) { - fence.Wait(); - fence.Reset(); - gpu_tick.store(host_tick); - gpu_tick.notify_all(); + std::scoped_lock lock{wait_mutex}; + wait_queue.emplace(host_tick, std::move(fence)); + wait_cv.notify_one(); } return result; } +void MasterSemaphore::WaitThread(std::stop_token token) { + while (!token.stop_requested()) { + u64 host_tick; + vk::Fence fence; + { + std::unique_lock lock{wait_mutex}; + Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); }); + if (token.stop_requested()) { + return; + } + std::tie(host_tick, fence) = std::move(wait_queue.front()); + wait_queue.pop(); + } + + fence.Wait(); + fence.Reset(); + gpu_tick.store(host_tick); + gpu_tick.notify_all(); + + std::scoped_lock lock{free_mutex}; + free_queue.push_front(std::move(fence)); + } +} + +vk::Fence MasterSemaphore::GetFreeFence() { + std::scoped_lock lock{free_mutex}; + if (free_queue.empty()) { + static constexpr VkFenceCreateInfo fence_ci{ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; + return device.GetLogical().CreateFence(fence_ci); + } + + auto fence = std::move(free_queue.back()); + free_queue.pop_back(); + return fence; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index f2f61f781b..1e7c902157 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -5,8 +5,10 @@ #include #include +#include #include #include +#include #include "common/common_types.h" #include "common/polyfill_thread.h" @@ -17,6 +19,8 @@ namespace Vulkan { class Device; class MasterSemaphore { + using Waitable = std::pair; + public: explicit MasterSemaphore(const Device& device); ~MasterSemaphore(); @@ -57,13 +61,22 @@ private: VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick); + void WaitThread(std::stop_token token); + + vk::Fence GetFreeFence(); + private: const Device& device; ///< Device. - vk::Fence fence; ///< Fence. vk::Semaphore semaphore; ///< Timeline semaphore. std::atomic gpu_tick{0}; ///< Current known GPU tick. std::atomic current_tick{1}; ///< Current logical tick. + std::mutex wait_mutex; + std::mutex free_mutex; + std::condition_variable_any wait_cv; + std::queue wait_queue; ///< Queue for the fences to be waited on by the wait thread. + std::deque free_queue; ///< Holds available fences for submission. std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs. + std::jthread wait_thread; ///< Helper thread that waits for submitted fences. }; } // namespace Vulkan From e5c2ec223aa3cc45974fbc4c046bbf2e8f8e8797 Mon Sep 17 00:00:00 2001 From: Danila Malyutin Date: Sun, 21 May 2023 03:02:26 +0400 Subject: [PATCH 3/5] externals: update cubeb --- externals/cubeb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/cubeb b/externals/cubeb index 2d817de7c5..48689ae7a7 160000 --- a/externals/cubeb +++ b/externals/cubeb @@ -1 +1 @@ -Subproject commit 2d817de7c58b33a7c045edf873f3f9c98e4a2082 +Subproject commit 48689ae7a73caeb747953f9ed664dc71d2f918d8 From f8e7b44d2816108ace9e262f5161a98079a4ca7b Mon Sep 17 00:00:00 2001 From: scorpion81 Date: Mon, 22 May 2023 16:48:55 +0200 Subject: [PATCH 4/5] Limit the device access memory to 4 GB Hardly limiting the device access memory to 4 GB for integrated vulkan devices here. This works for the Steam Deck in order not to go above 4 GB VRAM usage any more (above this value the likelihood to crash when the RAM exceeds 12 GB as well raises). But there will be perhaps a detection mechanism necessary for detecting the real memory limit for integrated vulkan devices. Those likely might have small limits anyway, but what about integrated GPUs on machines with > 16 GB RAM, aka larger amounts ? --- src/video_core/vulkan_common/vulkan_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f6e6f27369..c0b2b3e17b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1034,7 +1034,7 @@ void Device::CollectPhysicalMemoryInfo() { } const s64 available_memory = static_cast(device_access_memory - device_initial_usage); device_access_memory = static_cast(std::max( - std::min(available_memory - 8_GiB, 4_GiB), static_cast(local_memory))); + std::min(available_memory - 8_GiB, 4_GiB), std::min(local_memory, 4_GiB))); } void Device::CollectToolingInfo() { From 8758932031ff4836652e7577ec566fd733f46e0b Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 22 May 2023 01:13:47 -0400 Subject: [PATCH 5/5] renderer_vulkan: barrier attachment feedback loops --- .../renderer_opengl/gl_texture_cache.h | 4 +++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_texture_cache.cpp | 4 +++ .../renderer_vulkan/vk_texture_cache.h | 2 ++ src/video_core/texture_cache/texture_cache.h | 36 +++++++++++++++++++ .../texture_cache/texture_cache_base.h | 3 ++ 6 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1190999a8d..3e9b3302b6 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -144,6 +144,10 @@ public: return state_tracker; } + void BarrierFeedbackLoop() const noexcept { + // OpenGL does not require a barrier for attachment feedback loops. + } + private: struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f1bcd5cd67..506b78f08e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -481,12 +481,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } + texture_cache.UpdateRenderTargets(false); + texture_cache.CheckFeedbackLoop(views); ConfigureDraw(rescaling, render_area); } void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, const RenderAreaPushConstant& render_area) { - texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); if (!is_built.load(std::memory_order::relaxed)) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 4d0481f2a6..da5af25eb4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -861,6 +861,10 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { return *buffers[level]; } +void TextureCacheRuntime::BarrierFeedbackLoop() { + scheduler.RequestOutsideRenderPassOperationContext(); +} + void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, std::span copies) { std::vector vk_in_copies(copies.size()); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4166b3d201..0f7a5ffd45 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -103,6 +103,8 @@ public: [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); + void BarrierFeedbackLoop(); + const Device& device; Scheduler& scheduler; MemoryAllocator& memory_allocator; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b24086fce5..8e62a5f78c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -183,6 +183,42 @@ void TextureCache

::FillComputeImageViews(std::span views) { views); } +template +void TextureCache

::CheckFeedbackLoop(std::span views) { + const bool requires_barrier = [&] { + for (const auto& view : views) { + if (!view.id) { + continue; + } + auto& image_view = slot_image_views[view.id]; + + // Check color targets + for (const auto& ct_view_id : render_targets.color_buffer_ids) { + if (ct_view_id) { + auto& ct_view = slot_image_views[ct_view_id]; + if (image_view.image_id == ct_view.image_id) { + return true; + } + } + } + + // Check zeta target + if (render_targets.depth_buffer_id) { + auto& zt_view = slot_image_views[render_targets.depth_buffer_id]; + if (image_view.image_id == zt_view.image_id) { + return true; + } + } + } + + return false; + }(); + + if (requires_barrier) { + runtime.BarrierFeedbackLoop(); + } +} + template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { if (index > channel_state->graphics_sampler_table.Limit()) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 0720494e55..1a3308e2db 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -148,6 +148,9 @@ public: /// Fill image_view_ids with the compute images in indices void FillComputeImageViews(std::span views); + /// Handle feedback loops during draws. + void CheckFeedbackLoop(std::span views); + /// Get the sampler from the graphics descriptor table in the specified index Sampler* GetGraphicsSampler(u32 index);