mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 20:14:45 +00:00
vk_resource_pool: Rewrite desc heap
This commit is contained in:
parent
449c99868a
commit
8907e4ff3c
14 changed files with 105 additions and 85 deletions
|
@ -10,7 +10,6 @@
|
|||
#ifdef ENABLE_QT_GUI
|
||||
#include "qt_gui/memory_patcher.h"
|
||||
#endif
|
||||
#include "common/ntapi.h"
|
||||
#include "common/path_util.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/scm_rev.h"
|
||||
|
@ -26,7 +25,6 @@
|
|||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/ngs2/ngs2.h"
|
||||
#include "core/libraries/rtc/rtc.h"
|
||||
#include "core/libraries/videoout/video_out.h"
|
||||
#include "core/linker.h"
|
||||
#include "core/memory.h"
|
||||
#include "emulator.h"
|
||||
|
|
|
@ -305,7 +305,7 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
|||
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
|
||||
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
|
||||
if (buffer.is_integer) {
|
||||
value = ctx.OpBitcast(ctx.U32[4], value);
|
||||
value = ctx.OpBitcast(ctx.S32[4], value);
|
||||
}
|
||||
ctx.OpImageWrite(tex_buffer, coord, value);
|
||||
}
|
||||
|
|
|
@ -117,6 +117,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::BUFFER_ATOMIC_SWAP:
|
||||
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
|
||||
case Opcode::BUFFER_ATOMIC_UMIN:
|
||||
return BUFFER_ATOMIC(AtomicOp::Umin, inst);
|
||||
case Opcode::BUFFER_ATOMIC_UMAX:
|
||||
return BUFFER_ATOMIC(AtomicOp::Umax, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
|
|
@ -98,22 +98,7 @@ bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
|||
}
|
||||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
case IR::Opcode::BufferAtomicSwap32:
|
||||
return IR::Type::U32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
return IR::Type::U32;
|
||||
}
|
||||
|
||||
bool IsImageAtomicInstruction(const IR::Inst& inst) {
|
||||
|
|
|
@ -40,6 +40,8 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
info.uses_group_quad = true;
|
||||
break;
|
||||
case IR::Opcode::ReadLane:
|
||||
case IR::Opcode::ReadFirstLane:
|
||||
case IR::Opcode::WriteLane:
|
||||
info.uses_group_ballot = true;
|
||||
break;
|
||||
case IR::Opcode::Discard:
|
||||
|
|
|
@ -37,14 +37,14 @@ struct ImageSpecialization {
|
|||
* after the first compilation of a module.
|
||||
*/
|
||||
struct StageSpecialization {
|
||||
static constexpr size_t MaxStageResources = 32;
|
||||
static constexpr size_t MaxStageResources = 64;
|
||||
|
||||
const Shader::Info* info;
|
||||
RuntimeInfo runtime_info;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
boost::container::small_vector<ImageSpecialization, 8> images;
|
||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||
u32 start_binding{};
|
||||
|
||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
||||
|
|
|
@ -581,6 +581,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndirectBuffer: {
|
||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||
auto task = ProcessCompute(
|
||||
|
|
|
@ -301,7 +301,6 @@ GraphicsPipeline::~GraphicsPipeline() = default;
|
|||
|
||||
void GraphicsPipeline::BuildDescSetLayout() {
|
||||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
for (const auto* stage : stages) {
|
||||
if (!stage) {
|
||||
continue;
|
||||
|
@ -450,7 +449,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
});
|
||||
}
|
||||
|
||||
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
|
||||
boost::container::static_vector<AmdGpu::Image, 32> tsharps;
|
||||
for (const auto& image_desc : stage->images) {
|
||||
const auto tsharp = image_desc.GetSharp(*stage);
|
||||
if (tsharp) {
|
||||
|
|
|
@ -107,6 +107,7 @@ private:
|
|||
std::array<const Shader::Info*, MaxShaderStages> stages{};
|
||||
GraphicsPipelineKey key;
|
||||
bool uses_push_descriptors{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -20,6 +20,10 @@ namespace Vulkan {
|
|||
|
||||
using Shader::VsOutput;
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
constexpr static std::array DescriptorHeapSizes = {
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 8192},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, 1024},
|
||||
|
@ -29,10 +33,6 @@ constexpr static std::array DescriptorHeapSizes = {
|
|||
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
|
||||
};
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
||||
const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
|
@ -184,7 +184,7 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
|
|||
}
|
||||
|
||||
bool ShouldSkipShader(u64 shader_hash, const char* shader_type) {
|
||||
static constexpr std::array<u64, 0> skip_hashes = {};
|
||||
static constexpr std::array<u64, 4> skip_hashes = {0x6f27708a, 0x6af8ef74, 0xdf795c1f, 0xc2c49a3b};
|
||||
if (std::ranges::contains(skip_hashes, shader_hash)) {
|
||||
LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash);
|
||||
return true;
|
||||
|
|
|
@ -44,6 +44,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(
|
|||
case 0xc81ad50e:
|
||||
case 0xb7c39078:
|
||||
case 0x30b6e267: // TODO remove this
|
||||
case 0xde55a405: // TODO remove this
|
||||
case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE
|
||||
case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error)
|
||||
return VK_FALSE;
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
|
@ -103,68 +103,88 @@ vk::CommandBuffer CommandPool::Commit() {
|
|||
return cmd_buffers[index];
|
||||
}
|
||||
|
||||
constexpr u32 DESCRIPTOR_SET_BATCH = 32;
|
||||
|
||||
DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
|
||||
DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore_,
|
||||
std::span<const vk::DescriptorPoolSize> pool_sizes_,
|
||||
u32 descriptor_heap_count_)
|
||||
: ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()},
|
||||
: device{instance.GetDevice()}, master_semaphore{master_semaphore_},
|
||||
descriptor_heap_count{descriptor_heap_count_}, pool_sizes{pool_sizes_} {
|
||||
// Create descriptor pool
|
||||
AppendDescriptorPool();
|
||||
CreateDescriptorPool();
|
||||
}
|
||||
|
||||
DescriptorHeap::~DescriptorHeap() = default;
|
||||
|
||||
void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) {
|
||||
ASSERT(end - begin == DESCRIPTOR_SET_BATCH);
|
||||
descriptor_sets.resize(end);
|
||||
hashes.resize(end);
|
||||
|
||||
std::array<vk::DescriptorSetLayout, DESCRIPTOR_SET_BATCH> layouts;
|
||||
layouts.fill(descriptor_set_layout);
|
||||
|
||||
u32 current_pool = 0;
|
||||
vk::DescriptorSetAllocateInfo alloc_info = {
|
||||
.descriptorPool = *pools[current_pool],
|
||||
.descriptorSetCount = DESCRIPTOR_SET_BATCH,
|
||||
.pSetLayouts = layouts.data(),
|
||||
};
|
||||
|
||||
// Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new
|
||||
// one.
|
||||
while (true) {
|
||||
const auto result =
|
||||
device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin);
|
||||
if (result == vk::Result::eSuccess) {
|
||||
break;
|
||||
}
|
||||
if (result == vk::Result::eErrorOutOfPoolMemory) {
|
||||
current_pool++;
|
||||
if (current_pool == pools.size()) {
|
||||
LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
|
||||
AppendDescriptorPool();
|
||||
}
|
||||
alloc_info.descriptorPool = *pools[current_pool];
|
||||
}
|
||||
DescriptorHeap::~DescriptorHeap() {
|
||||
device.destroyDescriptorPool(curr_pool);
|
||||
for (const auto [pool, tick] : pending_pools) {
|
||||
master_semaphore->Wait(tick);
|
||||
device.destroyDescriptorPool(pool);
|
||||
}
|
||||
}
|
||||
|
||||
vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) {
|
||||
this->descriptor_set_layout = set_layout;
|
||||
const std::size_t index = CommitResource();
|
||||
return descriptor_sets[index];
|
||||
const u64 set_key = std::bit_cast<u64>(set_layout);
|
||||
const auto [it, _] = descriptor_sets.try_emplace(set_key);
|
||||
|
||||
// Check if allocated sets exist and pick one.
|
||||
if (!it->second.empty()) {
|
||||
const auto desc_set = it->second.back();
|
||||
it.value().pop_back();
|
||||
return desc_set;
|
||||
}
|
||||
|
||||
DescSetBatch desc_sets(DescriptorSetBatch);
|
||||
std::array<vk::DescriptorSetLayout, DescriptorSetBatch> layouts;
|
||||
layouts.fill(set_layout);
|
||||
|
||||
vk::DescriptorSetAllocateInfo alloc_info = {
|
||||
.descriptorPool = curr_pool,
|
||||
.descriptorSetCount = DescriptorSetBatch,
|
||||
.pSetLayouts = layouts.data(),
|
||||
};
|
||||
|
||||
// Attempt to allocate the descriptor set batch.
|
||||
auto result = device.allocateDescriptorSets(&alloc_info, desc_sets.data());
|
||||
if (result == vk::Result::eSuccess) {
|
||||
const auto desc_set = desc_sets.back();
|
||||
desc_sets.pop_back();
|
||||
it.value() = std::move(desc_sets);
|
||||
return desc_set;
|
||||
}
|
||||
|
||||
// The pool has run out. Record current tick and place it in pending list.
|
||||
ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory,
|
||||
"Unexpected error during descriptor set allocation {}",
|
||||
vk::to_string(result));
|
||||
pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick());
|
||||
if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) {
|
||||
curr_pool = pool;
|
||||
pending_pools.pop_front();
|
||||
device.resetDescriptorPool(curr_pool);
|
||||
} else {
|
||||
CreateDescriptorPool();
|
||||
}
|
||||
|
||||
// Attempt to allocate again with fresh pool.
|
||||
alloc_info.descriptorPool = curr_pool;
|
||||
result = device.allocateDescriptorSets(&alloc_info, desc_sets.data());
|
||||
ASSERT_MSG(result == vk::Result::eSuccess,
|
||||
"Unexpected error during descriptor set allocation {}",
|
||||
vk::to_string(result));
|
||||
|
||||
// We've changed pool so also reset descriptor batch cache.
|
||||
descriptor_sets.clear();
|
||||
const auto desc_set = desc_sets.back();
|
||||
desc_sets.pop_back();
|
||||
descriptor_sets[set_key] = std::move(desc_sets);
|
||||
return desc_set;
|
||||
}
|
||||
|
||||
void DescriptorHeap::AppendDescriptorPool() {
|
||||
void DescriptorHeap::CreateDescriptorPool() {
|
||||
const vk::DescriptorPoolCreateInfo pool_info = {
|
||||
.flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind,
|
||||
.maxSets = descriptor_heap_count,
|
||||
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
|
||||
.pPoolSizes = pool_sizes.data(),
|
||||
};
|
||||
auto& pool = pools.emplace_back();
|
||||
pool = device.createDescriptorPoolUnique(pool_info);
|
||||
curr_pool = device.createDescriptorPool(pool_info);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
|
||||
#include "common/types.h"
|
||||
|
@ -62,28 +64,28 @@ private:
|
|||
std::vector<vk::CommandBuffer> cmd_buffers;
|
||||
};
|
||||
|
||||
class DescriptorHeap final : public ResourcePool {
|
||||
class DescriptorHeap final {
|
||||
static constexpr u32 DescriptorSetBatch = 32;
|
||||
public:
|
||||
explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
|
||||
std::span<const vk::DescriptorPoolSize> pool_sizes,
|
||||
u32 descriptor_heap_count = 1024);
|
||||
~DescriptorHeap() override;
|
||||
|
||||
void Allocate(std::size_t begin, std::size_t end) override;
|
||||
~DescriptorHeap();
|
||||
|
||||
vk::DescriptorSet Commit(vk::DescriptorSetLayout set_layout);
|
||||
|
||||
private:
|
||||
void AppendDescriptorPool();
|
||||
void CreateDescriptorPool();
|
||||
|
||||
private:
|
||||
vk::Device device;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
MasterSemaphore* master_semaphore;
|
||||
u32 descriptor_heap_count;
|
||||
std::span<const vk::DescriptorPoolSize> pool_sizes;
|
||||
std::vector<vk::UniqueDescriptorPool> pools;
|
||||
std::vector<vk::DescriptorSet> descriptor_sets;
|
||||
std::vector<std::size_t> hashes;
|
||||
vk::DescriptorPool curr_pool;
|
||||
std::deque<std::pair<vk::DescriptorPool, u64>> pending_pools;
|
||||
using DescSetBatch = boost::container::static_vector<vk::DescriptorSet, DescriptorSetBatch>;
|
||||
tsl::robin_map<u64, DescSetBatch> descriptor_sets;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -69,7 +69,12 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
|
|||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept
|
||||
: is_storage{is_storage_} {
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
const auto dfmt = image.GetDataFmt();
|
||||
auto nfmt = image.GetNumberFmt();
|
||||
if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) {
|
||||
nfmt = AmdGpu::NumberFormat::Unorm;
|
||||
}
|
||||
format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt);
|
||||
range.base.level = image.base_level;
|
||||
range.base.layer = image.base_array;
|
||||
range.extent.levels = image.last_level + 1;
|
||||
|
@ -143,7 +148,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
|||
.aspectMask = aspect,
|
||||
.baseMipLevel = info.range.base.level,
|
||||
.levelCount = info.range.extent.levels - info.range.base.level,
|
||||
.baseArrayLayer = info_.range.base.layer,
|
||||
.baseArrayLayer = info.range.base.layer,
|
||||
.layerCount = info.range.extent.layers - info.range.base.layer,
|
||||
},
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue