video_core: Add basic vertex, index buffer handling and pipeline caching

This commit is contained in:
raphaelthegreat 2024-05-24 23:50:56 +03:00
parent 0eaa7d5859
commit f480d091ce
27 changed files with 506 additions and 174 deletions

2
.gitmodules vendored
View file

@ -50,7 +50,7 @@
[submodule "externals/toml11"]
path = externals/toml11
url = https://github.com/ToruNiina/toml11.git
[submodule "externals/xxHash"]
[submodule "externals/xxhash"]
path = externals/xxHash
url = https://github.com/Cyan4973/xxHash.git
[submodule "externals/zydis"]

View file

@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL)
add_subdirectory(toml11 EXCLUDE_FROM_ALL)
# xxHash
add_library(xxhash INTERFACE)
target_include_directories(xxhash INTERFACE xxhash)
add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c)
target_include_directories(xxhash PUBLIC xxhash)
# Zydis
option(ZYDIS_BUILD_TOOLS "" OFF)
@ -92,4 +92,4 @@ endif()
add_subdirectory(sirit EXCLUDE_FROM_ALL)
if (WIN32)
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
endif()
endif()

View file

@ -7,6 +7,7 @@
#include "common/scope_exit.h"
#include "core/libraries/error_codes.h"
#include "core/memory.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Core {
@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
new_vma.prot = prot;
new_vma.name = name;
new_vma.type = type;
if (type == VMAType::Direct) {
MapVulkanMemory(mapped_addr, size);
}
};
// When virtual addr is zero let the address space manager pick the address.
@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
"Attempting to unmap partially mapped range");
if (it->second.type == VMAType::Direct) {
UnmapVulkanMemory(virtual_addr, size);
}
// Mark region as free and attempt to coalesce it with neighbours.
auto& vma = it->second;
vma.type = VMAType::Free;
@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
impl.Unmap(virtual_addr, size);
}
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
auto it = mapped_memories.upper_bound(addr);
it = std::prev(it);
ASSERT(it != mapped_memories.end() && it->first <= addr);
return std::make_pair(*it->second.buffer, addr - it->first);
}
VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
auto vma_handle = FindVMA(virtual_addr);
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
return iter;
}
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
const vk::Device device = instance->GetDevice();
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
void* host_pointer = reinterpret_cast<void*>(addr);
const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT(
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer);
ASSERT(host_mem_props.memoryTypeBits != 0);
int mapped_memory_type = -1;
auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) {
u32 host_mem_types = host_mem_props.memoryTypeBits;
while (host_mem_types != 0) {
// Try to find a cached memory type
mapped_memory_type = std::countr_zero(host_mem_types);
host_mem_types -= (1 << mapped_memory_type);
if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) {
return;
}
}
mapped_memory_type = -1;
};
// First try to find a memory that is both coherent and cached
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent |
vk::MemoryPropertyFlagBits::eHostCached);
if (mapped_memory_type == -1)
// Then only coherent (lower performance)
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent);
if (mapped_memory_type == -1) {
LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping");
mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits);
}
const vk::StructureChain alloc_info = {
vk::MemoryAllocateInfo{
.allocationSize = size,
.memoryTypeIndex = static_cast<uint32_t>(mapped_memory_type),
},
vk::ImportMemoryHostPointerInfoEXT{
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
.pHostPointer = host_pointer,
},
};
const auto [it, new_memory] = mapped_memories.try_emplace(addr);
ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory");
auto& memory = it->second;
memory.backing = device.allocateMemoryUnique(alloc_info.get());
constexpr vk::BufferUsageFlags MapFlags =
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eUniformBuffer;
const vk::StructureChain buffer_info = {
vk::BufferCreateInfo{
.size = size,
.usage = MapFlags,
.sharingMode = vk::SharingMode::eExclusive,
},
vk::ExternalMemoryBufferCreateInfoKHR{
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
}};
memory.buffer = device.createBufferUnique(buffer_info.get());
device.bindBufferMemory(*memory.buffer, *memory.backing, 0);
}
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
const auto it = mapped_memories.find(addr);
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
mapped_memories.erase(it);
}
} // namespace Core

View file

@ -3,6 +3,7 @@
#pragma once
#include <functional>
#include <string_view>
#include <vector>
#include <boost/icl/split_interval_map.hpp>
@ -10,6 +11,11 @@
#include "common/singleton.h"
#include "common/types.h"
#include "core/address_space.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
}
namespace Core {
@ -86,6 +92,10 @@ public:
explicit MemoryManager();
~MemoryManager();
void SetInstance(const Vulkan::Instance* instance_) {
instance = instance_;
}
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type);
@ -97,11 +107,9 @@ public:
void UnmapMemory(VAddr virtual_addr, size_t size);
private:
bool HasOverlap(VAddr addr, size_t size) const {
return vma_map.find(addr) != vma_map.end();
}
std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
private:
VMAHandle FindVMA(VAddr target) {
// Return first the VMA with base >= target.
const auto it = vma_map.lower_bound(target);
@ -117,10 +125,22 @@ private:
VMAHandle MergeAdjacent(VMAHandle iter);
void MapVulkanMemory(VAddr addr, size_t size);
void UnmapVulkanMemory(VAddr addr, size_t size);
private:
AddressSpace impl;
std::vector<DirectMemoryArea> allocations;
VMAMap vma_map;
struct MappedMemory {
vk::UniqueBuffer buffer;
vk::UniqueDeviceMemory backing;
size_t buffer_size;
};
std::map<VAddr, MappedMemory> mapped_memories;
const Vulkan::Instance* instance{};
};
using Memory = Common::Singleton<MemoryManager>;

View file

@ -175,12 +175,14 @@ void EmitContext::DefineInputs(const IR::Program& program) {
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
}
break;
case Stage::Fragment:
for (const auto& input : info.ps_inputs) {
if (input.is_default) {
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]};
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
input_f32, F32[1]};
continue;
}
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
@ -192,6 +194,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
Name(id, fmt::format("fs_in_attr{}", input.semantic));
input_params[input.semantic] = {id, input_f32, F32[1], num_components};
interfaces.push_back(id);
}
default:
break;
@ -212,6 +215,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], num_components};
interfaces.push_back(id);
}
break;
case Stage::Fragment:

View file

@ -40,7 +40,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
struct VsharpLoad {
u32 dword_offset{};
s32 base_sgpr{};
s32 dst_sgpr{-1};
s32 dst_reg{-1};
};
boost::container::static_vector<VsharpLoad, 16> loads;
@ -57,11 +57,13 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
}
if (inst.inst_class == InstClass::VectorMemBufFmt) {
// SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
const u32 base_sgpr = inst.src[2].code * 4;
// Find the load instruction that loaded the V# to the SPGR.
// This is so we can determine its index in the vertex table.
const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) {
return load.dst_sgpr == inst.src[2].code * 4;
});
const auto it = std::ranges::find_if(
loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
auto& attrib = attributes.emplace_back();
attrib.semantic = semantic_index++;
@ -71,7 +73,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
attrib.dword_offset = it->dword_offset;
// Mark load as used.
it->dst_sgpr = -1;
it->dst_reg = -1;
}
}

View file

@ -9,11 +9,11 @@
namespace Shader::Gcn {
struct VertexAttribute {
u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component
u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
};
std::vector<VertexAttribute> ParseFetchShader(const u32* code);

View file

@ -2,8 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
@ -103,20 +103,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
for (const auto& attrib : attribs) {
IR::VectorReg dst_reg{attrib.dest_vgpr};
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
for (u32 i = 0; i < attrib.num_elements; i++) {
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
}
// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base,
attrib.dword_offset);
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
});
}
}

View file

@ -12,8 +12,6 @@
namespace Shader::IR {
struct Program {
explicit Program(const Info&& info_) : info{info_} {}
AbstractSyntaxList syntax_list;
BlockList blocks;
BlockList post_order_blocks;

View file

@ -44,7 +44,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
file.close();
// Decode and save instructions
IR::Program program{std::move(info)};
IR::Program program;
program.ins_list.reserve(token.size());
while (!slice.atEnd()) {
program.ins_list.emplace_back(decoder.decodeInstruction(slice));
@ -55,6 +55,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program.
program.info = std::move(info);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());

View file

@ -4,8 +4,8 @@
#pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader {
@ -30,7 +30,6 @@ struct BinaryInfo {
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool,
std::span<const u32> code,
const Info&& info);
std::span<const u32> code, const Info&& info);
} // namespace Shader

View file

@ -40,12 +40,12 @@ enum class TextureType : u32 {
constexpr u32 NUM_TEXTURE_TYPES = 7;
struct Info {
explicit Info(std::span<const u32, 16> user_data_) : user_data{user_data_} {}
struct VsInput {
AmdGpu::NumberFormat fmt;
u16 binding;
u16 num_components;
u8 sgpr_base;
u8 dword_offset;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};
@ -60,29 +60,33 @@ struct Info {
struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const {
return flags[static_cast<size_t>(attrib)] & (1 << comp);
return flags[Index(attrib)] & (1 << comp);
}
bool GetAny(IR::Attribute attrib) const {
return flags[static_cast<size_t>(attrib)];
return flags[Index(attrib)];
}
void Set(IR::Attribute attrib, u32 comp = 0) {
flags[static_cast<size_t>(attrib)] |= (1 << comp);
flags[Index(attrib)] |= (1 << comp);
}
u32 NumComponents(IR::Attribute attrib) const {
const u8 mask = flags[static_cast<size_t>(attrib)];
const u8 mask = flags[Index(attrib)];
ASSERT(mask != 0b1011 || mask != 0b1101);
return std::popcount(mask);
}
static size_t Index(IR::Attribute attrib) {
return static_cast<size_t>(attrib);
}
std::array<u8, IR::NumAttributes> flags;
};
AttributeFlags loads{};
AttributeFlags stores{};
std::span<const u32, 16> user_data;
std::span<const u32> user_data;
Stage stage;
template <typename T>

View file

@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
rasterizer->DrawIndex();
rasterizer->Draw(true);
}
break;
}
@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
rasterizer->DrawIndex();
if (rasterizer) {
rasterizer->Draw(false);
}
break;
}
case PM4ItOpcode::DispatchDirect: {

View file

@ -180,25 +180,6 @@ struct Liverpool {
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
};
union DepthSize {
u32 raw;
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
u32 Pitch() const {
return (pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (height_tile_max + 1) << 3;
}
};
union DepthSlice {
u32 raw;
BitField<0, 22, u32> slice_tile_max;
};
enum class StencilFunc : u32 {
Keep = 0,
Zero = 1,
@ -236,9 +217,45 @@ struct Liverpool {
BitField<24, 8, u32> stencil_op_val;
};
union StencilInfo {
u32 raw;
BitField<0, 1, u32> format;
struct DepthBuffer {
enum class ZFormat : u32 {
Invald = 0,
Z16 = 1,
Z32Float = 2,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
union {
BitField<0, 2, ZFormat> format;
BitField<2, 2, u32> num_samples;
BitField<13, 3, u32> tile_split;
} z_info;
union {
BitField<0, 1, StencilFormat> format;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
union {
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
} depth_size;
union {
BitField<0, 22, u32> tile_max;
} depth_slice;
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
};
enum class ClipSpace : u32 {
@ -505,6 +522,12 @@ struct Liverpool {
u64 CmaskAddress() const {
return u64(cmask_base_address) << 8;
}
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: info.number_type;
}
};
enum class PrimitiveType : u32 {
@ -539,14 +562,8 @@ struct Liverpool {
u32 stencil_clear;
u32 depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
StencilInfo stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
DepthSize depth_size;
DepthSlice depth_slice;
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
DepthBuffer depth_buffer;
INSERT_PADDING_WORDS(0xA08E - 0xA018);
ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask;
@ -595,6 +612,17 @@ struct Liverpool {
VgtNumInstances num_instances;
};
std::array<u32, NumRegs> reg_array{};
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &vs_program;
case 4:
return &ps_program;
default:
return nullptr;
}
}
};
Regs regs{};
@ -635,7 +663,7 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);

View file

@ -76,4 +76,3 @@ struct fmt::formatter<AmdGpu::NumberFormat> {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
}
};

View file

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "common/assert.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
@ -114,19 +114,41 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
}
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32B32A32Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) {
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8B8A8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) {
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb;
}
UNREACHABLE();
}
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format) {
UNREACHABLE();
}
void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
static constexpr u16 NumVerticesPerQuad = 4;
u16* out_data = reinterpret_cast<u16*>(out_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = i;
*out_data++ = i + 1;
*out_data++ = i + 2;
*out_data++ = i + 2;
*out_data++ = i;
*out_data++ = i + 3;
}
}
} // namespace Vulkan::LiverpoolToVK

View file

@ -23,4 +23,9 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode);
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format);
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
} // namespace Vulkan::LiverpoolToVK

View file

@ -4,22 +4,58 @@
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_,
vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_,
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
const PipelineKey& key_, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules)
: instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} {
: instance{instance_}, scheduler{scheduler_}, key{key_} {
const vk::Device device = instance.GetDevice();
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!infos[i]) {
continue;
}
stages[i] = *infos[i];
}
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.offset = 0,
});
bindings.push_back({
.binding = input.binding,
.stride = u32(buffer.stride),
.inputRate = vk::VertexInputRate::eVertex,
});
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = 0U,
.pVertexBindingDescriptions = nullptr,
.vertexAttributeDescriptionCount = 0U,
.pVertexAttributeDescriptions = nullptr,
.vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
.pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
.pVertexAttributeDescriptions = attributes.data(),
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pName = "main",
};
const vk::Format color_format = vk::Format::eR8G8B8A8Srgb;
const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = 1,
.pColorAttachmentFormats = &color_format,
.depthAttachmentFormat = vk::Format::eUndefined,
.colorAttachmentCount = num_color_formats,
.pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
.stencilAttachmentFormat = vk::Format::eUndefined,
};
@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = pipeline_layout,
.layout = *pipeline_layout,
};
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
const auto& vs_info = stages[0];
const size_t num_buffers = vs_info.vs_inputs.size();
for (u32 i = 0; i < num_buffers; ++i) {
const auto& input = vs_info.vs_inputs[i];
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
}
} // namespace Vulkan

View file

@ -1,19 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <xxhash.h>
#include "common/types.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace Vulkan {
static constexpr u32 MaxVertexBufferCount = 32;
static constexpr u32 MaxShaderStages = 5;
class Instance;
class Scheduler;
using Liverpool = AmdGpu::Liverpool;
struct PipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
vk::Format depth_format;
Liverpool::DepthControl depth;
Liverpool::StencilControl stencil;
Liverpool::StencilRefMask stencil_ref_front;
@ -21,26 +33,41 @@ struct PipelineKey {
Liverpool::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode;
Liverpool::CullMode cull_mode;
bool operator==(const PipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
}
};
static_assert(std::has_unique_object_representations_v<PipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key,
vk::PipelineCache pipeline_cache, vk::PipelineLayout layout,
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
const PipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules);
~GraphicsPipeline();
void BindResources(Core::MemoryManager* memory) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept {
return *pipeline;
}
private:
const Instance& instance;
Scheduler& scheduler;
vk::UniquePipeline pipeline;
vk::PipelineLayout pipeline_layout;
vk::PipelineCache pipeline_cache;
vk::UniquePipelineLayout pipeline_layout;
std::array<Shader::Info, MaxShaderStages> stages;
PipelineKey key;
};
} // namespace Vulkan
template <>
struct std::hash<Vulkan::PipelineKey> {
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
return XXH3_64bits(&key, sizeof(key));
}
};

View file

@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
const std::string api_version = GetReadableVersion(properties.apiVersion);
const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions);
LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
}
void Instance::CollectToolingInfo() {

View file

@ -2,10 +2,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream>
#include "common/scope_exit.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -14,8 +14,9 @@
namespace Vulkan {
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{user_data};
const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
info.user_data = user_data;
info.stage = stage;
switch (stage) {
case Shader::Stage::Fragment: {
@ -39,66 +40,96 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
block_pool{512} {
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
}
void PipelineCache::BindPipeline() {
SCOPE_EXIT {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
};
const GraphicsPipeline* PipelineCache::GetPipeline() {
RefreshKey();
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) {
it.value() = CreatePipeline();
}
const GraphicsPipeline* pipeline = it->second.get();
return pipeline;
}
if (pipeline) {
return;
void PipelineCache::RefreshKey() {
auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth = regs.depth_control;
key.stencil = regs.stencil_control;
key.stencil_ref_front = regs.stencil_ref_front;
key.stencil_ref_back = regs.stencil_ref_back;
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable
? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
: vk::Format::eUndefined;
for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
const auto& cb = regs.color_buffers[i];
key.color_formats[i] = cb.base_address
? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
: vk::Format::eUndefined;
}
const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) {
const u32* token = pgm.Address<u32>();
for (u32 i = 0; i < MaxShaderStages; i++) {
auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) {
key.stage_hashes[i] = 0;
continue;
}
const u32* code = pgm->Address<u32>();
// Retrieve shader header.
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo));
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
key.stage_hashes[i] = bininfo.shader_hash;
}
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
const auto& regs = liverpool->regs;
std::array<Shader::IR::Program, MaxShaderStages> programs;
std::array<const Shader::Info*, MaxShaderStages> infos{};
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!graphics_key.stage_hashes[i]) {
stages[i] = VK_NULL_HANDLE;
continue;
}
auto* pgm = regs.ProgramForStage(i);
const u32* code = pgm->Address<u32>();
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
// Lookup if the shader already exists.
const auto it = module_map.find(bininfo.shader_hash);
if (it != module_map.end()) {
return *it->second;
stages[i] = *it->second;
continue;
}
// Compile and cache shader.
const auto data = std::span{token, bininfo.length / sizeof(u32)};
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs);
auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info));
const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program);
static int counter = 0;
std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary);
file.write((const char*)code.data(), code.size() * sizeof(u32));
file.close();
// Recompile shader to IR.
const auto stage = Shader::Stage{i};
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
std::move(info));
return CompileSPV(code, instance.GetDevice());
};
// Compile IR to SPIR-V
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info;
}
// Retrieve shader stage modules.
// TODO: Only do this when program address is changed.
stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex);
stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment);
// Bind pipeline.
// TODO: Read entire key based on reg state.
graphics_key.prim_type = liverpool->regs.primitive_type;
graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode();
pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache,
*pipeline_layout, stages);
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
infos, stages);
}
} // namespace Vulkan

View file

@ -8,6 +8,10 @@
#include "shader_recompiler/object_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
namespace Shader {
struct Info;
}
namespace Vulkan {
class Instance;
@ -21,7 +25,12 @@ public:
AmdGpu::Liverpool* liverpool);
~PipelineCache() = default;
void BindPipeline();
const GraphicsPipeline* GetPipeline();
private:
void RefreshKey();
std::unique_ptr<GraphicsPipeline> CreatePipeline();
private:
const Instance& instance;
@ -31,7 +40,7 @@ private:
vk::UniquePipelineLayout pipeline_layout;
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
std::array<vk::ShaderModule, MaxShaderStages> stages{};
std::unique_ptr<GraphicsPipeline> pipeline;
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
PipelineKey graphics_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool;

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "core/memory.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -18,24 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
memory->SetInstance(&instance);
}
Rasterizer::~Rasterizer() = default;
void Rasterizer::DrawIndex() {
void Rasterizer::Draw(bool is_indexed) {
const auto cmdbuf = scheduler.CommandBuffer();
auto& regs = liverpool->regs;
UpdateDynamicState();
pipeline_cache.BindPipeline();
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed);
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
pipeline->BindResources(memory);
const vk::RenderingAttachmentInfo color_info = {
.imageView = *image_view.image_view,
@ -52,13 +54,50 @@ void Rasterizer::DrawIndex() {
.pColorAttachments = &color_info,
};
UpdateDynamicState();
cmdbuf.beginRendering(rendering_info);
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32);
cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0));
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering();
}
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
is_indexed = true;
// Emit indices.
const u32 index_size = 3 * regs.num_indices;
const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
vertex_index_buffer.Commit(index_size);
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
return index_size / sizeof(u16);
}
if (!is_indexed) {
return 0;
}
const VAddr index_address = regs.index_base_address.Address();
const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
const vk::IndexType index_type =
regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
: vk::IndexType::eUint32;
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(buffer, offset, index_type);
return regs.num_indices;
}
void Rasterizer::UpdateDynamicState() {
UpdateViewportScissorState();
}

View file

@ -3,7 +3,6 @@
#pragma once
#include <memory>
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -11,6 +10,10 @@ namespace AmdGpu {
struct Liverpool;
}
namespace Core {
class MemoryManager;
}
namespace VideoCore {
class TextureCache;
}
@ -26,20 +29,14 @@ public:
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
~Rasterizer();
/// Performs a draw call with an index buffer.
void DrawIndex();
/// Performs a draw call without an index buffer.
void DrawAuto();
/// Updates graphics state that is not part of the bound pipeline.
void UpdateDynamicState();
void Draw(bool is_indexed);
private:
/// Updates viewport and scissor from liverpool registers.
void UpdateViewportScissorState();
u32 SetupIndexBuffer(bool& is_indexed);
void MapMemory(VAddr addr, size_t size);
/// Updates depth and stencil pipeline state from liverpool registers.
void UpdateDynamicState();
void UpdateViewportScissorState();
void UpdateDepthStencilState();
private:
@ -47,6 +44,7 @@ private:
Scheduler& scheduler;
VideoCore::TextureCache& texture_cache;
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
PipelineCache pipeline_cache;
StreamBuffer vertex_index_buffer;
};

View file

@ -35,7 +35,7 @@ public:
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);

View file

@ -67,12 +67,8 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
// There is a small difference between T# and CB number types, account for it.
const auto number_fmt =
buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: buffer.info.number_type;
is_tiled = true;
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt);
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
type = vk::ImageType::e2D;
size.width = buffer.Pitch();
size.height = buffer.Height();

View file

@ -147,7 +147,8 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
return slot_image_views[view_id];
}
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image);
const ImageViewId view_id =
slot_image_views.insert(instance, scheduler, view_info, image.image);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];