diff --git a/src/emulator.cpp b/src/emulator.cpp index a469a31ce..57366453a 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -10,7 +10,6 @@ #ifdef ENABLE_QT_GUI #include "qt_gui/memory_patcher.h" #endif -#include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" #include "common/scm_rev.h" @@ -26,7 +25,6 @@ #include "core/libraries/libs.h" #include "core/libraries/ngs2/ngs2.h" #include "core/libraries/rtc/rtc.h" -#include "core/libraries/videoout/video_out.h" #include "core/linker.h" #include "core/memory.h" #include "emulator.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 64ce532b5..7df62a910 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -305,7 +305,7 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); if (buffer.is_integer) { - value = ctx.OpBitcast(ctx.U32[4], value); + value = ctx.OpBitcast(ctx.S32[4], value); } ctx.OpImageWrite(tex_buffer, coord, value); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index b45e93882..f602e762e 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -117,6 +117,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_ATOMIC(AtomicOp::Add, inst); case Opcode::BUFFER_ATOMIC_SWAP: return BUFFER_ATOMIC(AtomicOp::Swap, inst); + case Opcode::BUFFER_ATOMIC_UMIN: + return BUFFER_ATOMIC(AtomicOp::Umin, inst); + case Opcode::BUFFER_ATOMIC_UMAX: + return BUFFER_ATOMIC(AtomicOp::Umax, inst); default: LogMissingOpcode(inst); } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 47bc82f40..6b2aa8bbf 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -98,22 +98,7 @@ bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { } IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { - switch (inst.GetOpcode()) { - case IR::Opcode::LoadBufferU32: - case IR::Opcode::LoadBufferU32x2: - case IR::Opcode::LoadBufferU32x3: - case IR::Opcode::LoadBufferU32x4: - case IR::Opcode::StoreBufferU32: - case IR::Opcode::StoreBufferU32x2: - case IR::Opcode::StoreBufferU32x3: - case IR::Opcode::StoreBufferU32x4: - case IR::Opcode::ReadConstBuffer: - case IR::Opcode::BufferAtomicIAdd32: - case IR::Opcode::BufferAtomicSwap32: - return IR::Type::U32; - default: - UNREACHABLE(); - } + return IR::Type::U32; } bool IsImageAtomicInstruction(const IR::Inst& inst) { diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 7835ef1a9..63fe8a571 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -40,6 +40,8 @@ void Visit(Info& info, IR::Inst& inst) { info.uses_group_quad = true; break; case IR::Opcode::ReadLane: + case IR::Opcode::ReadFirstLane: + case IR::Opcode::WriteLane: info.uses_group_ballot = true; break; case IR::Opcode::Discard: diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 3dd75dbd7..bbcafdb86 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -37,14 +37,14 @@ struct ImageSpecialization { * after the first compilation of a module. */ struct StageSpecialization { - static constexpr size_t MaxStageResources = 32; + static constexpr size_t MaxStageResources = 64; const Shader::Info* info; RuntimeInfo runtime_info; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; - boost::container::small_vector images; + boost::container::small_vector images; u32 start_binding{}; explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index a2bd60f2e..a461d7e6a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -581,6 +581,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { const auto* nop = reinterpret_cast(header); break; } + case PM4ItOpcode::DmaData: { + break; + } case PM4ItOpcode::IndirectBuffer: { const auto* indirect_buffer = reinterpret_cast(header); auto task = ProcessCompute( diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5550c4112..69ab524d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -301,7 +301,6 @@ GraphicsPipeline::~GraphicsPipeline() = default; void GraphicsPipeline::BuildDescSetLayout() { u32 binding{}; - boost::container::small_vector bindings; for (const auto* stage : stages) { if (!stage) { continue; @@ -450,7 +449,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, }); } - boost::container::static_vector tsharps; + boost::container::static_vector tsharps; for (const auto& image_desc : stage->images) { const auto tsharp = image_desc.GetSharp(*stage); if (tsharp) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 345c7ebee..7778c4178 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -107,6 +107,7 @@ private: std::array stages{}; GraphicsPipelineKey key; bool uses_push_descriptors{}; + boost::container::small_vector bindings; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2bfbd8a8f..eacc2f8fd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -20,6 +20,10 @@ namespace Vulkan { using Shader::VsOutput; +[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + constexpr static std::array DescriptorHeapSizes = { vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 8192}, vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, 1024}, @@ -29,10 +33,6 @@ constexpr static std::array DescriptorHeapSizes = { vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024}, }; -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); -} - void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { @@ -184,7 +184,7 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { } bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { - static constexpr std::array skip_hashes = {}; + static constexpr std::array skip_hashes = {0x6f27708a, 0x6af8ef74, 0xdf795c1f, 0xc2c49a3b}; if (std::ranges::contains(skip_hashes, shader_hash)) { LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash); return true; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 4724b6038..7017cbc96 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -44,6 +44,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( case 0xc81ad50e: case 0xb7c39078: case 0x30b6e267: // TODO remove this + case 0xde55a405: // TODO remove this case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index 61004f859..1d4ef8aa7 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -3,8 +3,8 @@ #include #include -#include #include "common/assert.h" +#include "common/scope_exit.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -103,68 +103,88 @@ vk::CommandBuffer CommandPool::Commit() { return cmd_buffers[index]; } -constexpr u32 DESCRIPTOR_SET_BATCH = 32; - -DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, +DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore_, std::span pool_sizes_, u32 descriptor_heap_count_) - : ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()}, + : device{instance.GetDevice()}, master_semaphore{master_semaphore_}, descriptor_heap_count{descriptor_heap_count_}, pool_sizes{pool_sizes_} { - // Create descriptor pool - AppendDescriptorPool(); + CreateDescriptorPool(); } -DescriptorHeap::~DescriptorHeap() = default; - -void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) { - ASSERT(end - begin == DESCRIPTOR_SET_BATCH); - descriptor_sets.resize(end); - hashes.resize(end); - - std::array layouts; - layouts.fill(descriptor_set_layout); - - u32 current_pool = 0; - vk::DescriptorSetAllocateInfo alloc_info = { - .descriptorPool = *pools[current_pool], - .descriptorSetCount = DESCRIPTOR_SET_BATCH, - .pSetLayouts = layouts.data(), - }; - - // Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new - // one. - while (true) { - const auto result = - device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin); - if (result == vk::Result::eSuccess) { - break; - } - if (result == vk::Result::eErrorOutOfPoolMemory) { - current_pool++; - if (current_pool == pools.size()) { - LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!"); - AppendDescriptorPool(); - } - alloc_info.descriptorPool = *pools[current_pool]; - } +DescriptorHeap::~DescriptorHeap() { + device.destroyDescriptorPool(curr_pool); + for (const auto [pool, tick] : pending_pools) { + master_semaphore->Wait(tick); + device.destroyDescriptorPool(pool); } } vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) { - this->descriptor_set_layout = set_layout; - const std::size_t index = CommitResource(); - return descriptor_sets[index]; + const u64 set_key = std::bit_cast(set_layout); + const auto [it, _] = descriptor_sets.try_emplace(set_key); + + // Check if allocated sets exist and pick one. + if (!it->second.empty()) { + const auto desc_set = it->second.back(); + it.value().pop_back(); + return desc_set; + } + + DescSetBatch desc_sets(DescriptorSetBatch); + std::array layouts; + layouts.fill(set_layout); + + vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = curr_pool, + .descriptorSetCount = DescriptorSetBatch, + .pSetLayouts = layouts.data(), + }; + + // Attempt to allocate the descriptor set batch. + auto result = device.allocateDescriptorSets(&alloc_info, desc_sets.data()); + if (result == vk::Result::eSuccess) { + const auto desc_set = desc_sets.back(); + desc_sets.pop_back(); + it.value() = std::move(desc_sets); + return desc_set; + } + + // The pool has run out. Record current tick and place it in pending list. + ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory, + "Unexpected error during descriptor set allocation {}", + vk::to_string(result)); + pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick()); + if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) { + curr_pool = pool; + pending_pools.pop_front(); + device.resetDescriptorPool(curr_pool); + } else { + CreateDescriptorPool(); + } + + // Attempt to allocate again with fresh pool. + alloc_info.descriptorPool = curr_pool; + result = device.allocateDescriptorSets(&alloc_info, desc_sets.data()); + ASSERT_MSG(result == vk::Result::eSuccess, + "Unexpected error during descriptor set allocation {}", + vk::to_string(result)); + + // We've changed pool so also reset descriptor batch cache. + descriptor_sets.clear(); + const auto desc_set = desc_sets.back(); + desc_sets.pop_back(); + descriptor_sets[set_key] = std::move(desc_sets); + return desc_set; } -void DescriptorHeap::AppendDescriptorPool() { +void DescriptorHeap::CreateDescriptorPool() { const vk::DescriptorPoolCreateInfo pool_info = { .flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind, .maxSets = descriptor_heap_count, .poolSizeCount = static_cast(pool_sizes.size()), .pPoolSizes = pool_sizes.data(), }; - auto& pool = pools.emplace_back(); - pool = device.createDescriptorPoolUnique(pool_info); + curr_pool = device.createDescriptorPool(pool_info); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 00a600c10..20c11eb51 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -3,7 +3,9 @@ #pragma once +#include #include +#include #include #include "common/types.h" @@ -62,28 +64,28 @@ private: std::vector cmd_buffers; }; -class DescriptorHeap final : public ResourcePool { +class DescriptorHeap final { + static constexpr u32 DescriptorSetBatch = 32; public: explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, std::span pool_sizes, u32 descriptor_heap_count = 1024); - ~DescriptorHeap() override; - - void Allocate(std::size_t begin, std::size_t end) override; + ~DescriptorHeap(); vk::DescriptorSet Commit(vk::DescriptorSetLayout set_layout); private: - void AppendDescriptorPool(); + void CreateDescriptorPool(); private: vk::Device device; - vk::DescriptorSetLayout descriptor_set_layout; + MasterSemaphore* master_semaphore; u32 descriptor_heap_count; std::span pool_sizes; - std::vector pools; - std::vector descriptor_sets; - std::vector hashes; + vk::DescriptorPool curr_pool; + std::deque> pending_pools; + using DescSetBatch = boost::container::static_vector; + tsl::robin_map descriptor_sets; }; } // namespace Vulkan diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index e30c12648..f94c1a37b 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -69,7 +69,12 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept : is_storage{is_storage_} { type = ConvertImageViewType(image.GetType()); - format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); + const auto dfmt = image.GetDataFmt(); + auto nfmt = image.GetNumberFmt(); + if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) { + nfmt = AmdGpu::NumberFormat::Unorm; + } + format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt); range.base.level = image.base_level; range.base.layer = image.base_array; range.extent.levels = image.last_level + 1; @@ -143,7 +148,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .aspectMask = aspect, .baseMipLevel = info.range.base.level, .levelCount = info.range.extent.levels - info.range.base.level, - .baseArrayLayer = info_.range.base.layer, + .baseArrayLayer = info.range.base.layer, .layerCount = info.range.extent.layers - info.range.base.layer, }, };