mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-20 19:44:46 +00:00
video_core: Add basic vertex, index buffer handling and pipeline caching
This commit is contained in:
parent
0eaa7d5859
commit
f480d091ce
27 changed files with 506 additions and 174 deletions
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -50,7 +50,7 @@
|
|||
[submodule "externals/toml11"]
|
||||
path = externals/toml11
|
||||
url = https://github.com/ToruNiina/toml11.git
|
||||
[submodule "externals/xxHash"]
|
||||
[submodule "externals/xxhash"]
|
||||
path = externals/xxHash
|
||||
url = https://github.com/Cyan4973/xxHash.git
|
||||
[submodule "externals/zydis"]
|
||||
|
|
6
externals/CMakeLists.txt
vendored
6
externals/CMakeLists.txt
vendored
|
@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL)
|
|||
add_subdirectory(toml11 EXCLUDE_FROM_ALL)
|
||||
|
||||
# xxHash
|
||||
add_library(xxhash INTERFACE)
|
||||
target_include_directories(xxhash INTERFACE xxhash)
|
||||
add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c)
|
||||
target_include_directories(xxhash PUBLIC xxhash)
|
||||
|
||||
# Zydis
|
||||
option(ZYDIS_BUILD_TOOLS "" OFF)
|
||||
|
@ -92,4 +92,4 @@ endif()
|
|||
add_subdirectory(sirit EXCLUDE_FROM_ALL)
|
||||
if (WIN32)
|
||||
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "common/scope_exit.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
|
@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
|||
new_vma.prot = prot;
|
||||
new_vma.name = name;
|
||||
new_vma.type = type;
|
||||
|
||||
if (type == VMAType::Direct) {
|
||||
MapVulkanMemory(mapped_addr, size);
|
||||
}
|
||||
};
|
||||
|
||||
// When virtual addr is zero let the address space manager pick the address.
|
||||
|
@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
|||
ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
|
||||
"Attempting to unmap partially mapped range");
|
||||
|
||||
if (it->second.type == VMAType::Direct) {
|
||||
UnmapVulkanMemory(virtual_addr, size);
|
||||
}
|
||||
|
||||
// Mark region as free and attempt to coalesce it with neighbours.
|
||||
auto& vma = it->second;
|
||||
vma.type = VMAType::Free;
|
||||
|
@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
|||
impl.Unmap(virtual_addr, size);
|
||||
}
|
||||
|
||||
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
|
||||
auto it = mapped_memories.upper_bound(addr);
|
||||
it = std::prev(it);
|
||||
ASSERT(it != mapped_memories.end() && it->first <= addr);
|
||||
return std::make_pair(*it->second.buffer, addr - it->first);
|
||||
}
|
||||
|
||||
VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
|
||||
auto vma_handle = FindVMA(virtual_addr);
|
||||
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
|
||||
|
@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
|
|||
return iter;
|
||||
}
|
||||
|
||||
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
|
||||
const vk::Device device = instance->GetDevice();
|
||||
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
|
||||
void* host_pointer = reinterpret_cast<void*>(addr);
|
||||
const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT(
|
||||
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer);
|
||||
ASSERT(host_mem_props.memoryTypeBits != 0);
|
||||
|
||||
int mapped_memory_type = -1;
|
||||
auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) {
|
||||
u32 host_mem_types = host_mem_props.memoryTypeBits;
|
||||
while (host_mem_types != 0) {
|
||||
// Try to find a cached memory type
|
||||
mapped_memory_type = std::countr_zero(host_mem_types);
|
||||
host_mem_types -= (1 << mapped_memory_type);
|
||||
|
||||
if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
mapped_memory_type = -1;
|
||||
};
|
||||
|
||||
// First try to find a memory that is both coherent and cached
|
||||
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent |
|
||||
vk::MemoryPropertyFlagBits::eHostCached);
|
||||
if (mapped_memory_type == -1)
|
||||
// Then only coherent (lower performance)
|
||||
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent);
|
||||
|
||||
if (mapped_memory_type == -1) {
|
||||
LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping");
|
||||
mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits);
|
||||
}
|
||||
|
||||
const vk::StructureChain alloc_info = {
|
||||
vk::MemoryAllocateInfo{
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = static_cast<uint32_t>(mapped_memory_type),
|
||||
},
|
||||
vk::ImportMemoryHostPointerInfoEXT{
|
||||
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
|
||||
.pHostPointer = host_pointer,
|
||||
},
|
||||
};
|
||||
|
||||
const auto [it, new_memory] = mapped_memories.try_emplace(addr);
|
||||
ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory");
|
||||
|
||||
auto& memory = it->second;
|
||||
memory.backing = device.allocateMemoryUnique(alloc_info.get());
|
||||
|
||||
constexpr vk::BufferUsageFlags MapFlags =
|
||||
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer;
|
||||
|
||||
const vk::StructureChain buffer_info = {
|
||||
vk::BufferCreateInfo{
|
||||
.size = size,
|
||||
.usage = MapFlags,
|
||||
.sharingMode = vk::SharingMode::eExclusive,
|
||||
},
|
||||
vk::ExternalMemoryBufferCreateInfoKHR{
|
||||
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
|
||||
}};
|
||||
memory.buffer = device.createBufferUnique(buffer_info.get());
|
||||
device.bindBufferMemory(*memory.buffer, *memory.backing, 0);
|
||||
}
|
||||
|
||||
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
|
||||
const auto it = mapped_memories.find(addr);
|
||||
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
|
||||
mapped_memories.erase(it);
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <boost/icl/split_interval_map.hpp>
|
||||
|
@ -10,6 +11,11 @@
|
|||
#include "common/singleton.h"
|
||||
#include "common/types.h"
|
||||
#include "core/address_space.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
class Instance;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
|
@ -86,6 +92,10 @@ public:
|
|||
explicit MemoryManager();
|
||||
~MemoryManager();
|
||||
|
||||
void SetInstance(const Vulkan::Instance* instance_) {
|
||||
instance = instance_;
|
||||
}
|
||||
|
||||
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
|
||||
int memory_type);
|
||||
|
||||
|
@ -97,11 +107,9 @@ public:
|
|||
|
||||
void UnmapMemory(VAddr virtual_addr, size_t size);
|
||||
|
||||
private:
|
||||
bool HasOverlap(VAddr addr, size_t size) const {
|
||||
return vma_map.find(addr) != vma_map.end();
|
||||
}
|
||||
std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
|
||||
|
||||
private:
|
||||
VMAHandle FindVMA(VAddr target) {
|
||||
// Return first the VMA with base >= target.
|
||||
const auto it = vma_map.lower_bound(target);
|
||||
|
@ -117,10 +125,22 @@ private:
|
|||
|
||||
VMAHandle MergeAdjacent(VMAHandle iter);
|
||||
|
||||
void MapVulkanMemory(VAddr addr, size_t size);
|
||||
|
||||
void UnmapVulkanMemory(VAddr addr, size_t size);
|
||||
|
||||
private:
|
||||
AddressSpace impl;
|
||||
std::vector<DirectMemoryArea> allocations;
|
||||
VMAMap vma_map;
|
||||
|
||||
struct MappedMemory {
|
||||
vk::UniqueBuffer buffer;
|
||||
vk::UniqueDeviceMemory backing;
|
||||
size_t buffer_size;
|
||||
};
|
||||
std::map<VAddr, MappedMemory> mapped_memories;
|
||||
const Vulkan::Instance* instance{};
|
||||
};
|
||||
|
||||
using Memory = Common::Singleton<MemoryManager>;
|
||||
|
|
|
@ -175,12 +175,14 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
|||
const Id id{DefineInput(type, input.binding)};
|
||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
for (const auto& input : info.ps_inputs) {
|
||||
if (input.is_default) {
|
||||
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]};
|
||||
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
|
||||
input_f32, F32[1]};
|
||||
continue;
|
||||
}
|
||||
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
|
||||
|
@ -192,6 +194,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
|||
}
|
||||
Name(id, fmt::format("fs_in_attr{}", input.semantic));
|
||||
input_params[input.semantic] = {id, input_f32, F32[1], num_components};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
|
@ -212,6 +215,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
|
|||
const Id id{DefineOutput(F32[num_components], i)};
|
||||
Name(id, fmt::format("out_attr{}", i));
|
||||
output_params[i] = {id, output_f32, F32[1], num_components};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
|
|
|
@ -40,7 +40,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
|||
struct VsharpLoad {
|
||||
u32 dword_offset{};
|
||||
s32 base_sgpr{};
|
||||
s32 dst_sgpr{-1};
|
||||
s32 dst_reg{-1};
|
||||
};
|
||||
boost::container::static_vector<VsharpLoad, 16> loads;
|
||||
|
||||
|
@ -57,11 +57,13 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
|||
}
|
||||
|
||||
if (inst.inst_class == InstClass::VectorMemBufFmt) {
|
||||
// SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
|
||||
const u32 base_sgpr = inst.src[2].code * 4;
|
||||
|
||||
// Find the load instruction that loaded the V# to the SPGR.
|
||||
// This is so we can determine its index in the vertex table.
|
||||
const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) {
|
||||
return load.dst_sgpr == inst.src[2].code * 4;
|
||||
});
|
||||
const auto it = std::ranges::find_if(
|
||||
loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
|
||||
|
||||
auto& attrib = attributes.emplace_back();
|
||||
attrib.semantic = semantic_index++;
|
||||
|
@ -71,7 +73,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
|||
attrib.dword_offset = it->dword_offset;
|
||||
|
||||
// Mark load as used.
|
||||
it->dst_sgpr = -1;
|
||||
it->dst_reg = -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,11 +9,11 @@
|
|||
namespace Shader::Gcn {
|
||||
|
||||
struct VertexAttribute {
|
||||
u8 semantic; ///< Semantic index of the attribute
|
||||
u8 dest_vgpr; ///< Destination VGPR to load first component
|
||||
u8 num_elements; ///< Number of components to load
|
||||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||
u8 semantic; ///< Semantic index of the attribute
|
||||
u8 dest_vgpr; ///< Destination VGPR to load first component.
|
||||
u8 num_elements; ///< Number of components to load
|
||||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||
};
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
|
@ -103,20 +103,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
// Parse the assembly to generate a list of attributes.
|
||||
const auto attribs = ParseFetchShader(code);
|
||||
for (const auto& attrib : attribs) {
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
for (u32 i = 0; i < attrib.num_elements; i++) {
|
||||
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
|
||||
}
|
||||
|
||||
// Read the V# of the attribute to figure out component number and type.
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base,
|
||||
attrib.dword_offset);
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
|
||||
info.vs_inputs.push_back({
|
||||
.fmt = buffer.num_format,
|
||||
.binding = attrib.semantic,
|
||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
namespace Shader::IR {
|
||||
|
||||
struct Program {
|
||||
explicit Program(const Info&& info_) : info{info_} {}
|
||||
|
||||
AbstractSyntaxList syntax_list;
|
||||
BlockList blocks;
|
||||
BlockList post_order_blocks;
|
||||
|
|
|
@ -44,7 +44,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
file.close();
|
||||
|
||||
// Decode and save instructions
|
||||
IR::Program program{std::move(info)};
|
||||
IR::Program program;
|
||||
program.ins_list.reserve(token.size());
|
||||
while (!slice.atEnd()) {
|
||||
program.ins_list.emplace_back(decoder.decodeInstruction(slice));
|
||||
|
@ -55,6 +55,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
|
||||
|
||||
// Structurize control flow graph and create program.
|
||||
program.info = std::move(info);
|
||||
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
|
||||
program.blocks = GenerateBlocks(program.syntax_list);
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
@ -30,7 +30,6 @@ struct BinaryInfo {
|
|||
|
||||
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> code,
|
||||
const Info&& info);
|
||||
std::span<const u32> code, const Info&& info);
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -40,12 +40,12 @@ enum class TextureType : u32 {
|
|||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||
|
||||
struct Info {
|
||||
explicit Info(std::span<const u32, 16> user_data_) : user_data{user_data_} {}
|
||||
|
||||
struct VsInput {
|
||||
AmdGpu::NumberFormat fmt;
|
||||
u16 binding;
|
||||
u16 num_components;
|
||||
u8 sgpr_base;
|
||||
u8 dword_offset;
|
||||
};
|
||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||
|
||||
|
@ -60,29 +60,33 @@ struct Info {
|
|||
|
||||
struct AttributeFlags {
|
||||
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
||||
return flags[static_cast<size_t>(attrib)] & (1 << comp);
|
||||
return flags[Index(attrib)] & (1 << comp);
|
||||
}
|
||||
|
||||
bool GetAny(IR::Attribute attrib) const {
|
||||
return flags[static_cast<size_t>(attrib)];
|
||||
return flags[Index(attrib)];
|
||||
}
|
||||
|
||||
void Set(IR::Attribute attrib, u32 comp = 0) {
|
||||
flags[static_cast<size_t>(attrib)] |= (1 << comp);
|
||||
flags[Index(attrib)] |= (1 << comp);
|
||||
}
|
||||
|
||||
u32 NumComponents(IR::Attribute attrib) const {
|
||||
const u8 mask = flags[static_cast<size_t>(attrib)];
|
||||
const u8 mask = flags[Index(attrib)];
|
||||
ASSERT(mask != 0b1011 || mask != 0b1101);
|
||||
return std::popcount(mask);
|
||||
}
|
||||
|
||||
static size_t Index(IR::Attribute attrib) {
|
||||
return static_cast<size_t>(attrib);
|
||||
}
|
||||
|
||||
std::array<u8, IR::NumAttributes> flags;
|
||||
};
|
||||
AttributeFlags loads{};
|
||||
AttributeFlags stores{};
|
||||
|
||||
std::span<const u32, 16> user_data;
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
|
|||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (rasterizer) {
|
||||
rasterizer->DrawIndex();
|
||||
rasterizer->Draw(true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
|
|||
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
|
||||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
rasterizer->DrawIndex();
|
||||
if (rasterizer) {
|
||||
rasterizer->Draw(false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
|
|
|
@ -180,25 +180,6 @@ struct Liverpool {
|
|||
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
|
||||
};
|
||||
|
||||
union DepthSize {
|
||||
u32 raw;
|
||||
BitField<0, 11, u32> pitch_tile_max;
|
||||
BitField<11, 11, u32> height_tile_max;
|
||||
|
||||
u32 Pitch() const {
|
||||
return (pitch_tile_max + 1) << 3;
|
||||
}
|
||||
|
||||
u32 Height() const {
|
||||
return (height_tile_max + 1) << 3;
|
||||
}
|
||||
};
|
||||
|
||||
union DepthSlice {
|
||||
u32 raw;
|
||||
BitField<0, 22, u32> slice_tile_max;
|
||||
};
|
||||
|
||||
enum class StencilFunc : u32 {
|
||||
Keep = 0,
|
||||
Zero = 1,
|
||||
|
@ -236,9 +217,45 @@ struct Liverpool {
|
|||
BitField<24, 8, u32> stencil_op_val;
|
||||
};
|
||||
|
||||
union StencilInfo {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> format;
|
||||
struct DepthBuffer {
|
||||
enum class ZFormat : u32 {
|
||||
Invald = 0,
|
||||
Z16 = 1,
|
||||
Z32Float = 2,
|
||||
};
|
||||
|
||||
enum class StencilFormat : u32 {
|
||||
Invalid = 0,
|
||||
Stencil8 = 1,
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 2, ZFormat> format;
|
||||
BitField<2, 2, u32> num_samples;
|
||||
BitField<13, 3, u32> tile_split;
|
||||
} z_info;
|
||||
union {
|
||||
BitField<0, 1, StencilFormat> format;
|
||||
} stencil_info;
|
||||
u32 z_read_base;
|
||||
u32 stencil_read_base;
|
||||
u32 z_write_base;
|
||||
u32 stencil_write_base;
|
||||
union {
|
||||
BitField<0, 11, u32> pitch_tile_max;
|
||||
BitField<11, 11, u32> height_tile_max;
|
||||
} depth_size;
|
||||
union {
|
||||
BitField<0, 22, u32> tile_max;
|
||||
} depth_slice;
|
||||
|
||||
u32 Pitch() const {
|
||||
return (depth_size.pitch_tile_max + 1) << 3;
|
||||
}
|
||||
|
||||
u32 Height() const {
|
||||
return (depth_size.height_tile_max + 1) << 3;
|
||||
}
|
||||
};
|
||||
|
||||
enum class ClipSpace : u32 {
|
||||
|
@ -505,6 +522,12 @@ struct Liverpool {
|
|||
u64 CmaskAddress() const {
|
||||
return u64(cmask_base_address) << 8;
|
||||
}
|
||||
|
||||
NumberFormat NumFormat() const {
|
||||
// There is a small difference between T# and CB number types, account for it.
|
||||
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
|
||||
: info.number_type;
|
||||
}
|
||||
};
|
||||
|
||||
enum class PrimitiveType : u32 {
|
||||
|
@ -539,14 +562,8 @@ struct Liverpool {
|
|||
u32 stencil_clear;
|
||||
u32 depth_clear;
|
||||
Scissor screen_scissor;
|
||||
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
|
||||
StencilInfo stencil_info;
|
||||
u32 z_read_base;
|
||||
u32 stencil_read_base;
|
||||
u32 z_write_base;
|
||||
u32 stencil_write_base;
|
||||
DepthSize depth_size;
|
||||
DepthSlice depth_slice;
|
||||
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
|
||||
DepthBuffer depth_buffer;
|
||||
INSERT_PADDING_WORDS(0xA08E - 0xA018);
|
||||
ColorBufferMask color_target_mask;
|
||||
ColorBufferMask color_shader_mask;
|
||||
|
@ -595,6 +612,17 @@ struct Liverpool {
|
|||
VgtNumInstances num_instances;
|
||||
};
|
||||
std::array<u32, NumRegs> reg_array{};
|
||||
|
||||
const ShaderProgram* ProgramForStage(u32 index) const {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return &vs_program;
|
||||
case 4:
|
||||
return &ps_program;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Regs regs{};
|
||||
|
@ -635,7 +663,7 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
|||
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
|
||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
|
||||
|
|
|
@ -76,4 +76,3 @@ struct fmt::formatter<AmdGpu::NumberFormat> {
|
|||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#pragma clang optimize off
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
|
||||
|
@ -114,19 +114,41 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
|
|||
}
|
||||
|
||||
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) {
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR32G32B32A32Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) {
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR32G32B32Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
return vk::Format::eR8G8B8A8Srgb;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
||||
Liverpool::DepthBuffer::StencilFormat stencil_format) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
|
||||
static constexpr u16 NumVerticesPerQuad = 4;
|
||||
u16* out_data = reinterpret_cast<u16*>(out_ptr);
|
||||
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
|
||||
*out_data++ = i;
|
||||
*out_data++ = i + 1;
|
||||
*out_data++ = i + 2;
|
||||
*out_data++ = i + 2;
|
||||
*out_data++ = i;
|
||||
*out_data++ = i + 3;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan::LiverpoolToVK
|
||||
|
|
|
@ -23,4 +23,9 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode);
|
|||
|
||||
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
|
||||
|
||||
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
||||
Liverpool::DepthBuffer::StencilFormat stencil_format);
|
||||
|
||||
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
|
||||
|
||||
} // namespace Vulkan::LiverpoolToVK
|
||||
|
|
|
@ -4,22 +4,58 @@
|
|||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_,
|
||||
vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_,
|
||||
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
const PipelineKey& key_, vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::array<vk::ShaderModule, MaxShaderStages> modules)
|
||||
: instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} {
|
||||
: instance{instance_}, scheduler{scheduler_}, key{key_} {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
if (!infos[i]) {
|
||||
continue;
|
||||
}
|
||||
stages[i] = *infos[i];
|
||||
}
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 0U,
|
||||
.pSetLayouts = nullptr,
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
};
|
||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
|
||||
const auto& vs_info = stages[0];
|
||||
for (const auto& input : vs_info.vs_inputs) {
|
||||
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
attributes.push_back({
|
||||
.location = input.binding,
|
||||
.binding = input.binding,
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
|
||||
.offset = 0,
|
||||
});
|
||||
bindings.push_back({
|
||||
.binding = input.binding,
|
||||
.stride = u32(buffer.stride),
|
||||
.inputRate = vk::VertexInputRate::eVertex,
|
||||
});
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = 0U,
|
||||
.pVertexBindingDescriptions = nullptr,
|
||||
.vertexAttributeDescriptionCount = 0U,
|
||||
.pVertexAttributeDescriptions = nullptr,
|
||||
.vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
|
||||
.pVertexBindingDescriptions = bindings.data(),
|
||||
.vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
|
||||
.pVertexAttributeDescriptions = attributes.data(),
|
||||
};
|
||||
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
|
@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
|
|||
.pName = "main",
|
||||
};
|
||||
|
||||
const vk::Format color_format = vk::Format::eR8G8B8A8Srgb;
|
||||
const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
|
||||
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
|
||||
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachmentFormats = &color_format,
|
||||
.depthAttachmentFormat = vk::Format::eUndefined,
|
||||
.colorAttachmentCount = num_color_formats,
|
||||
.pColorAttachmentFormats = key.color_formats.data(),
|
||||
.depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
|
||||
.stencilAttachmentFormat = vk::Format::eUndefined,
|
||||
};
|
||||
|
||||
|
@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
|
|||
.pDepthStencilState = &depth_info,
|
||||
.pColorBlendState = &color_blending,
|
||||
.pDynamicState = &dynamic_info,
|
||||
.layout = pipeline_layout,
|
||||
.layout = *pipeline_layout,
|
||||
};
|
||||
|
||||
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
|
||||
|
@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
|
|||
|
||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
||||
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
|
||||
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
|
||||
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
|
||||
|
||||
const auto& vs_info = stages[0];
|
||||
const size_t num_buffers = vs_info.vs_inputs.size();
|
||||
for (u32 i = 0; i < num_buffers; ++i) {
|
||||
const auto& input = vs_info.vs_inputs[i];
|
||||
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,19 +1,31 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <xxhash.h>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Core {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr u32 MaxVertexBufferCount = 32;
|
||||
static constexpr u32 MaxShaderStages = 5;
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
|
||||
using Liverpool = AmdGpu::Liverpool;
|
||||
|
||||
struct PipelineKey {
|
||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
vk::Format depth_format;
|
||||
|
||||
Liverpool::DepthControl depth;
|
||||
Liverpool::StencilControl stencil;
|
||||
Liverpool::StencilRefMask stencil_ref_front;
|
||||
|
@ -21,26 +33,41 @@ struct PipelineKey {
|
|||
Liverpool::PrimitiveType prim_type;
|
||||
Liverpool::PolygonMode polygon_mode;
|
||||
Liverpool::CullMode cull_mode;
|
||||
|
||||
bool operator==(const PipelineKey& key) const noexcept {
|
||||
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<PipelineKey>);
|
||||
|
||||
class GraphicsPipeline {
|
||||
public:
|
||||
explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key,
|
||||
vk::PipelineCache pipeline_cache, vk::PipelineLayout layout,
|
||||
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
|
||||
const PipelineKey& key, vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::array<vk::ShaderModule, MaxShaderStages> modules);
|
||||
~GraphicsPipeline();
|
||||
|
||||
void BindResources(Core::MemoryManager* memory) const;
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
vk::UniquePipeline pipeline;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::PipelineCache pipeline_cache;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
std::array<Shader::Info, MaxShaderStages> stages;
|
||||
PipelineKey key;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
template <>
|
||||
struct std::hash<Vulkan::PipelineKey> {
|
||||
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
|
||||
return XXH3_64bits(&key, sizeof(key));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
|
|||
const std::string api_version = GetReadableVersion(properties.apiVersion);
|
||||
const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
|
||||
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Model", model_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
|
||||
}
|
||||
|
||||
void Instance::CollectToolingInfo() {
|
||||
|
|
|
@ -2,10 +2,10 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <fstream>
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
@ -14,8 +14,9 @@
|
|||
namespace Vulkan {
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
||||
AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{user_data};
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
info.user_data = user_data;
|
||||
info.stage = stage;
|
||||
switch (stage) {
|
||||
case Shader::Stage::Fragment: {
|
||||
|
@ -39,66 +40,96 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
|
||||
block_pool{512} {
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 0U,
|
||||
.pSetLayouts = nullptr,
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
};
|
||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
}
|
||||
|
||||
void PipelineCache::BindPipeline() {
|
||||
SCOPE_EXIT {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
};
|
||||
const GraphicsPipeline* PipelineCache::GetPipeline() {
|
||||
RefreshKey();
|
||||
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
|
||||
if (is_new) {
|
||||
it.value() = CreatePipeline();
|
||||
}
|
||||
const GraphicsPipeline* pipeline = it->second.get();
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
if (pipeline) {
|
||||
return;
|
||||
void PipelineCache::RefreshKey() {
|
||||
auto& regs = liverpool->regs;
|
||||
auto& key = graphics_key;
|
||||
|
||||
key.depth = regs.depth_control;
|
||||
key.stencil = regs.stencil_control;
|
||||
key.stencil_ref_front = regs.stencil_ref_front;
|
||||
key.stencil_ref_back = regs.stencil_ref_back;
|
||||
key.prim_type = regs.primitive_type;
|
||||
key.polygon_mode = regs.polygon_control.PolyMode();
|
||||
|
||||
const auto& db = regs.depth_buffer;
|
||||
key.depth_format = key.depth.depth_enable
|
||||
? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
|
||||
: vk::Format::eUndefined;
|
||||
for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
|
||||
const auto& cb = regs.color_buffers[i];
|
||||
key.color_formats[i] = cb.base_address
|
||||
? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
|
||||
: vk::Format::eUndefined;
|
||||
}
|
||||
|
||||
const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) {
|
||||
const u32* token = pgm.Address<u32>();
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
if (!pgm || !pgm->Address<u32>()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
continue;
|
||||
}
|
||||
const u32* code = pgm->Address<u32>();
|
||||
|
||||
// Retrieve shader header.
|
||||
Shader::BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo));
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
key.stage_hashes[i] = bininfo.shader_hash;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
|
||||
const auto& regs = liverpool->regs;
|
||||
|
||||
std::array<Shader::IR::Program, MaxShaderStages> programs;
|
||||
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
||||
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
if (!graphics_key.stage_hashes[i]) {
|
||||
stages[i] = VK_NULL_HANDLE;
|
||||
continue;
|
||||
}
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
const u32* code = pgm->Address<u32>();
|
||||
|
||||
Shader::BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
|
||||
// Lookup if the shader already exists.
|
||||
const auto it = module_map.find(bininfo.shader_hash);
|
||||
if (it != module_map.end()) {
|
||||
return *it->second;
|
||||
stages[i] = *it->second;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compile and cache shader.
|
||||
const auto data = std::span{token, bininfo.length / sizeof(u32)};
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs);
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info));
|
||||
const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program);
|
||||
|
||||
static int counter = 0;
|
||||
std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary);
|
||||
file.write((const char*)code.data(), code.size() * sizeof(u32));
|
||||
file.close();
|
||||
// Recompile shader to IR.
|
||||
const auto stage = Shader::Stage{i};
|
||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
|
||||
std::move(info));
|
||||
|
||||
return CompileSPV(code, instance.GetDevice());
|
||||
};
|
||||
// Compile IR to SPIR-V
|
||||
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
|
||||
stages[i] = CompileSPV(spv_code, instance.GetDevice());
|
||||
infos[i] = &programs[i].info;
|
||||
}
|
||||
|
||||
// Retrieve shader stage modules.
|
||||
// TODO: Only do this when program address is changed.
|
||||
stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex);
|
||||
stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment);
|
||||
|
||||
// Bind pipeline.
|
||||
// TODO: Read entire key based on reg state.
|
||||
graphics_key.prim_type = liverpool->regs.primitive_type;
|
||||
graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode();
|
||||
pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache,
|
||||
*pipeline_layout, stages);
|
||||
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
|
||||
infos, stages);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
#include "shader_recompiler/object_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
|
@ -21,7 +25,12 @@ public:
|
|||
AmdGpu::Liverpool* liverpool);
|
||||
~PipelineCache() = default;
|
||||
|
||||
void BindPipeline();
|
||||
const GraphicsPipeline* GetPipeline();
|
||||
|
||||
private:
|
||||
void RefreshKey();
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreatePipeline();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
@ -31,7 +40,7 @@ private:
|
|||
vk::UniquePipelineLayout pipeline_layout;
|
||||
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
|
||||
std::array<vk::ShaderModule, MaxShaderStages> stages{};
|
||||
std::unique_ptr<GraphicsPipeline> pipeline;
|
||||
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||
PipelineKey graphics_key{};
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Shader::ObjectPool<Shader::IR::Block> block_pool;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
@ -18,24 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
|
|||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
||||
memory->SetInstance(&instance);
|
||||
}
|
||||
|
||||
Rasterizer::~Rasterizer() = default;
|
||||
|
||||
void Rasterizer::DrawIndex() {
|
||||
void Rasterizer::Draw(bool is_indexed) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
auto& regs = liverpool->regs;
|
||||
|
||||
UpdateDynamicState();
|
||||
|
||||
pipeline_cache.BindPipeline();
|
||||
|
||||
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
|
||||
const auto& regs = liverpool->regs;
|
||||
const u32 num_indices = SetupIndexBuffer(is_indexed);
|
||||
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
|
||||
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
|
||||
pipeline->BindResources(memory);
|
||||
|
||||
const vk::RenderingAttachmentInfo color_info = {
|
||||
.imageView = *image_view.image_view,
|
||||
|
@ -52,13 +54,50 @@ void Rasterizer::DrawIndex() {
|
|||
.pColorAttachments = &color_info,
|
||||
};
|
||||
|
||||
UpdateDynamicState();
|
||||
|
||||
cmdbuf.beginRendering(rendering_info);
|
||||
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32);
|
||||
cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0));
|
||||
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
if (is_indexed) {
|
||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||
} else {
|
||||
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
|
||||
}
|
||||
cmdbuf.endRendering();
|
||||
}
|
||||
|
||||
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
|
||||
// Emulate QuadList primitive type with CPU made index buffer.
|
||||
const auto& regs = liverpool->regs;
|
||||
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
|
||||
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
|
||||
is_indexed = true;
|
||||
|
||||
// Emit indices.
|
||||
const u32 index_size = 3 * regs.num_indices;
|
||||
const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
|
||||
LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
|
||||
vertex_index_buffer.Commit(index_size);
|
||||
|
||||
// Bind index buffer.
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
|
||||
return index_size / sizeof(u16);
|
||||
}
|
||||
if (!is_indexed) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const VAddr index_address = regs.index_base_address.Address();
|
||||
const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
|
||||
const vk::IndexType index_type =
|
||||
regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
|
||||
: vk::IndexType::eUint32;
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindIndexBuffer(buffer, offset, index_type);
|
||||
return regs.num_indices;
|
||||
}
|
||||
|
||||
void Rasterizer::UpdateDynamicState() {
|
||||
UpdateViewportScissorState();
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
|
@ -11,6 +10,10 @@ namespace AmdGpu {
|
|||
struct Liverpool;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class TextureCache;
|
||||
}
|
||||
|
@ -26,20 +29,14 @@ public:
|
|||
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
|
||||
~Rasterizer();
|
||||
|
||||
/// Performs a draw call with an index buffer.
|
||||
void DrawIndex();
|
||||
|
||||
/// Performs a draw call without an index buffer.
|
||||
void DrawAuto();
|
||||
|
||||
/// Updates graphics state that is not part of the bound pipeline.
|
||||
void UpdateDynamicState();
|
||||
void Draw(bool is_indexed);
|
||||
|
||||
private:
|
||||
/// Updates viewport and scissor from liverpool registers.
|
||||
void UpdateViewportScissorState();
|
||||
u32 SetupIndexBuffer(bool& is_indexed);
|
||||
void MapMemory(VAddr addr, size_t size);
|
||||
|
||||
/// Updates depth and stencil pipeline state from liverpool registers.
|
||||
void UpdateDynamicState();
|
||||
void UpdateViewportScissorState();
|
||||
void UpdateDepthStencilState();
|
||||
|
||||
private:
|
||||
|
@ -47,6 +44,7 @@ private:
|
|||
Scheduler& scheduler;
|
||||
VideoCore::TextureCache& texture_cache;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
PipelineCache pipeline_cache;
|
||||
StreamBuffer vertex_index_buffer;
|
||||
};
|
||||
|
|
|
@ -35,7 +35,7 @@ public:
|
|||
* @param size Size to reserve.
|
||||
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
|
||||
*/
|
||||
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
|
||||
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
|
||||
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Commit(u64 size);
|
||||
|
|
|
@ -67,12 +67,8 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
|
|||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
|
||||
// There is a small difference between T# and CB number types, account for it.
|
||||
const auto number_fmt =
|
||||
buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
|
||||
: buffer.info.number_type;
|
||||
is_tiled = true;
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt);
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
|
||||
type = vk::ImageType::e2D;
|
||||
size.width = buffer.Pitch();
|
||||
size.height = buffer.Height();
|
||||
|
|
|
@ -147,7 +147,8 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
|
|||
return slot_image_views[view_id];
|
||||
}
|
||||
|
||||
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image);
|
||||
const ImageViewId view_id =
|
||||
slot_image_views.insert(instance, scheduler, view_info, image.image);
|
||||
image.image_view_infos.emplace_back(view_info);
|
||||
image.image_view_ids.emplace_back(view_id);
|
||||
return slot_image_views[view_id];
|
||||
|
|
Loading…
Add table
Reference in a new issue