diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index ffec70300..645bcf423 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -2155,6 +2155,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() { int PS4_SYSV_ABI sceGnmSubmitDone() { LOG_DEBUG(Lib_GnmDriver, "called"); + WaitGpuIdle(); if (!liverpool->IsGpuIdle()) { submission_lock = true; } diff --git a/src/emulator.cpp b/src/emulator.cpp index 57366453a..e631698fb 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -10,6 +10,7 @@ #ifdef ENABLE_QT_GUI #include "qt_gui/memory_patcher.h" #endif +#include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" #include "common/scm_rev.h" diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 4e0c110c2..c9144fac1 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -171,7 +171,7 @@ T Translator::GetSrc(const InstOperand& operand) { } } else { if (operand.input_modifier.abs) { - LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction"); + value = ir.IAbs(value); } if (operand.input_modifier.neg) { UNREACHABLE(); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 9af7386a4..c4e16b7a4 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #pragma once #include @@ -182,6 +181,7 @@ struct Info { const u32* base = user_data.data(); if (ptr_index != IR::NumScalarRegs) { std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + base = reinterpret_cast(VAddr(base) & 0xFFFFFFFFFFFFULL); } std::memcpy(&data, base + dword_offset, sizeof(T)); return data; diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp index a87cf31b1..76bfcf911 100644 --- a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp +++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -21,8 +21,7 @@ void LowerSharedMemToRegisters(IR::Program& program) { const IR::Inst* prod = inst.Arg(0).InstRecursive(); const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) { const IR::Inst* write_prod = write->Arg(0).InstRecursive(); - return write_prod->Arg(1).U32() == prod->Arg(1).U32() && - write_prod->Arg(0) == prod->Arg(0); + return write_prod->Arg(1).U32() == prod->Arg(1).U32(); }); ASSERT(it != ds_writes.end()); // Replace data read with value written. diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index a461d7e6a..a2bd60f2e 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -581,9 +581,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { const auto* nop = reinterpret_cast(header); break; } - case PM4ItOpcode::DmaData: { - break; - } case PM4ItOpcode::IndirectBuffer: { const auto* indirect_buffer = reinterpret_cast(header); auto task = ProcessCompute( diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index fd7980c17..064b89951 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -187,6 +187,11 @@ struct PM4CmdSetData { BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+ ///< Program to zero for other opcodes and on SI }; + u32 data[0]; + + [[nodiscard]] u32 Size() const { + return header.count << 2u; + } template static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) { @@ -350,6 +355,16 @@ struct PM4CmdEventWriteEop { } }; +struct PM4CmdAcquireMem { + PM4Type3Header header; + u32 cp_coher_cntl; + u32 cp_coher_size_lo; + u32 cp_coher_size_hi; + u32 cp_coher_base_lo; + u32 cp_coher_base_hi; + u32 poll_interval; +}; + enum class DmaDataDst : u32 { Memory = 0, Gds = 1, @@ -467,6 +482,10 @@ struct PM4CmdWriteData { }; u32 data[0]; + u32 Size() const { + return (header.count.Value() - 2) * 4; + } + template void Address(T addr) { addr64 = static_cast(addr); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index f1cf8136e..2ed0ddc87 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -586,9 +586,6 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - if (offset + (mip_ofs * num_layers) > buffer.SizeBytes()) { - break; - } copies.push_back({ .bufferOffset = offset, .bufferRowLength = static_cast(mip_pitch), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 69ab524d6..2f5209eb2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -452,7 +452,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, boost::container::static_vector tsharps; for (const auto& image_desc : stage->images) { const auto tsharp = image_desc.GetSharp(*stage); - if (tsharp) { + if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { tsharps.emplace_back(tsharp); VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eacc2f8fd..e19467b00 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -184,7 +184,7 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { } bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { - static constexpr std::array skip_hashes = {0x6f27708a, 0x6af8ef74, 0xdf795c1f, 0xc2c49a3b}; + static constexpr std::array skip_hashes = {}; if (std::ranges::contains(skip_hashes, shader_hash)) { LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash); return true; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 7017cbc96..feadda96c 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -43,8 +43,6 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( case 0x609a13b: // Vertex attribute at location not consumed by shader case 0xc81ad50e: case 0xb7c39078: - case 0x30b6e267: // TODO remove this - case 0xde55a405: // TODO remove this case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index 1d4ef8aa7..a5ee22c25 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -151,8 +151,7 @@ vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) { // The pool has run out. Record current tick and place it in pending list. ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory, - "Unexpected error during descriptor set allocation {}", - vk::to_string(result)); + "Unexpected error during descriptor set allocation {}", vk::to_string(result)); pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick()); if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) { curr_pool = pool; @@ -166,8 +165,7 @@ vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) { alloc_info.descriptorPool = curr_pool; result = device.allocateDescriptorSets(&alloc_info, desc_sets.data()); ASSERT_MSG(result == vk::Result::eSuccess, - "Unexpected error during descriptor set allocation {}", - vk::to_string(result)); + "Unexpected error during descriptor set allocation {}", vk::to_string(result)); // We've changed pool so also reset descriptor batch cache. descriptor_sets.clear(); diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 20c11eb51..98c2ddb8c 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -66,6 +66,7 @@ private: class DescriptorHeap final { static constexpr u32 DescriptorSetBatch = 32; + public: explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, std::span pool_sizes, diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 2a5c4c434..d494322a9 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -73,7 +73,6 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { if (!info.IsBlockCoded() && !info.IsPacked()) { usage |= vk::ImageUsageFlagBits::eColorAttachment; } - // In cases where an image is created as a render/depth target and cleared with compute, // we cannot predict whether it will be used as a storage image. A proper solution would // involve re-creating the resource with a new configuration and copying previous content