video_core: Remove hack in rasterizer

* The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers
This commit is contained in:
raphaelthegreat 2024-05-22 23:05:19 +03:00
parent e9f64bb76c
commit 08e155946e
24 changed files with 193 additions and 80 deletions

View file

@ -0,0 +1,81 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/static_vector.hpp>
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/fetch_shader.h"
namespace Shader::Gcn {
/**
* s_load_dwordx4 s[8:11], s[2:3], 0x00
* s_load_dwordx4 s[12:15], s[2:3], 0x04
* s_load_dwordx4 s[16:19], s[2:3], 0x08
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen
* buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen
* buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen
* s_waitcnt 0
* s_setpc_b64 s[0:1]
* s_load_dwordx4 s[4:7], s[2:3], 0x0
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen
* s_load_dwordx4 s[4:7], s[2:3], 0x8
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen
* s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)
* s_setpc_b64 s[0:1]
* A normal fetch shader looks like the above, the instructions are generated
* using input semantics on cpu side. Load instructions can either be separate or interleaved
* We take the reverse way, extract the original input semantics from these instructions.
**/
std::vector<VertexAttribute> ParseFetchShader(std::span<const u32> code) {
std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code.data(), code.data() + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
struct VsharpLoad {
u32 dword_offset{};
s32 base_sgpr{};
s32 dst_sgpr{-1};
};
boost::container::static_vector<VsharpLoad, 16> loads;
u32 semantic_index = 0;
while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice);
if (inst.opcode == Opcode::S_SETPC_B64) {
break;
}
if (inst.inst_class == InstClass::ScalarMemRd) {
loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code);
continue;
}
if (inst.inst_class == InstClass::VectorMemBufFmt) {
// Find the load instruction that loaded the V# to the SPGR.
// This is so we can determine its index in the vertex table.
const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) {
return load.dst_sgpr == inst.src[2].code * 4;
});
auto& attrib = attributes.emplace_back();
attrib.semantic = semantic_index++;
attrib.dest_vgpr = inst.src[1].code;
attrib.num_elements = inst.control.mubuf.count;
attrib.sgpr_base = it->base_sgpr;
attrib.dword_offset = it->dword_offset;
// Mark load as used.
it->dst_sgpr = -1;
}
}
return attributes;
}
} // namespace Shader::Gcn

View file

@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <vector>
#include "common/types.h"
namespace Shader::Gcn {
struct VertexAttribute {
u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component
u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
};
std::vector<VertexAttribute> ParseFetchShader(std::span<const u32> code);
} // namespace Shader::Gcn

View file

@ -634,7 +634,6 @@ private:
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stage, inst_list.subspan(start, size));
fmt::print("{}\n", IR::DumpBlock(*current_block));
break;
}
case StatementType::SetVariable: {

View file

@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) {
return "Param31";
case Attribute::VertexId:
return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::FragCoord:
return "FragCoord";
default:
break;
}

View file

@ -4,6 +4,7 @@
#pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
@ -11,6 +12,6 @@ void SsaRewritePass(IR::BlockList& program);
void IdentityRemovalPass(IR::BlockList& program);
void DeadCodeEliminationPass(IR::BlockList& program);
void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::BlockList& program);
void ResourceTrackingPass(IR::Program& program);
} // namespace Shader::Optimization

View file

@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) {
};
}
void ResourceTrackingPass(IR::BlockList& program) {
for (IR::Block* const block : program) {
void ResourceTrackingPass(IR::Program& program) {
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (!IsResourceInstruction(inst)) {
continue;
}
printf("ff\n");
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto loc = TrackSharp(producer->Arg(0));
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,

View file

@ -15,11 +15,14 @@ enum class Stage : u32;
namespace Shader::IR {
static constexpr size_t NumUserDataRegs = 16;
struct Program {
AbstractSyntaxList syntax_list;
BlockList blocks;
BlockList post_order_blocks;
std::vector<Gcn::GcnInst> ins_list;
std::array<u32, NumUserDataRegs> user_data;
Stage stage;
};

View file

@ -32,6 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32, IR::NumUserDataRegs> ud_regs,
std::span<const u32> token) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
@ -40,6 +41,11 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
Gcn::GcnDecodeContext decoder;
static int counter = 0;
std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary);
file.write((const char*)token.data(), token.size_bytes());
file.close();
// Decode and save instructions
IR::Program program;
program.ins_list.reserve(token.size());
@ -56,14 +62,19 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
program.stage = stage;
std::ranges::copy(ud_regs, program.user_data.begin());
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);
// Shader::Optimization::ResourceTrackingPass(program.post_order_blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
for (const auto& block : program.blocks) {
fmt::print("{}\n", IR::DumpBlock(*block));
}
// TODO: Pass profile from vulkan backend
const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program);
return code;

View file

@ -28,6 +28,7 @@ struct BinaryInfo {
[[nodiscard]] std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32, IR::NumUserDataRegs> ud_regs,
std::span<const u32> code);
} // namespace Shader

View file

@ -122,7 +122,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex();
rasterizer->DrawIndex();
break;
}
case PM4ItOpcode::DispatchDirect: {

View file

@ -6,6 +6,7 @@
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include <array>
#include <condition_variable>
@ -423,39 +424,6 @@ struct Liverpool {
Swap8In64 = 3,
};
enum class Format : u32 {
Invalid = 0,
Color_8 = 1,
Color_16 = 2,
Color_8_8 = 3,
Color_32 = 4,
Color_16_16 = 5,
Color_10_11_11 = 6,
Color_11_11_10 = 7,
Color_10_10_10_2 = 8,
Color_2_10_10_10 = 9,
Color_8_8_8_8 = 10,
Color_32_32 = 11,
Color_16_16_16_16 = 12,
Color_32_32_32_32 = 14,
Color_5_6_5 = 16,
Color_1_5_5_5 = 17,
Color_5_5_5_1 = 18,
Color_4_4_4_4 = 19,
Color_8_24 = 20,
Color_24_8 = 21,
Color_X24_8_32_FL = 22,
};
enum class NumberType : u32 {
Unorm = 0,
Snorm = 1,
Uint = 4,
Sint = 5,
Srgb = 6,
Float = 7,
};
enum class SwapMode : u32 {
Standard = 0,
Alternate = 1,
@ -482,9 +450,9 @@ struct Liverpool {
} view;
union {
BitField<0, 2, EndianSwap> endian;
BitField<2, 5, Format> format;
BitField<2, 5, DataFormat> format;
BitField<7, 1, u32> linear_general;
BitField<8, 2, NumberType> number_type;
BitField<8, 2, NumberFormat> number_type;
BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression;

View file

@ -6,7 +6,7 @@
namespace AmdGpu {
u32 getNumComponents(DataFormat format) {
u32 NumComponents(DataFormat format) {
constexpr std::array numComponentsPerElement = {
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,

View file

@ -59,6 +59,6 @@ enum class NumberFormat : u32 {
Ubscaled = 13,
};
u32 getNumComponents(DataFormat format);
u32 NumComponents(DataFormat format);
} // namespace AmdGpu

View file

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "common/assert.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
return vk::PrimitiveTopology::eTriangleListWithAdjacency;
case Liverpool::PrimitiveType::AdjTriangleStrip:
return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
case Liverpool::PrimitiveType::QuadList:
// Needs to generate index buffer on the fly.
return vk::PrimitiveTopology::eTriangleList;
default:
UNREACHABLE();
return vk::PrimitiveTopology::eTriangleList;
@ -110,4 +113,20 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
}
}
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32B32A32Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8B8A8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb;
}
UNREACHABLE();
}
} // namespace Vulkan::LiverpoolToVK

View file

@ -4,6 +4,7 @@
#pragma once
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan::LiverpoolToVK {
@ -20,4 +21,6 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
vk::CullModeFlags CullMode(Liverpool::CullMode mode);
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
} // namespace Vulkan::LiverpoolToVK

View file

@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
if (!frame) {
if (!splash_img.has_value()) {
VideoCore::ImageInfo info{};
info.pixel_format = vk::Format::eR8G8B8A8Srgb;
info.type = vk::ImageType::e2D;
@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr
}
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
// Request a free presentation frame.
Frame* frame = GetRenderFrame();

View file

@ -126,7 +126,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pName = "main",
};
const vk::Format color_format = vk::Format::eB8G8R8A8Srgb;
const vk::Format color_format = vk::Format::eR8G8B8A8Srgb;
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = 1,
.pColorAttachmentFormats = &color_format,

View file

@ -50,7 +50,7 @@ void PipelineCache::BindPipeline() {
// Compile and cache shader.
const auto data = std::span{token, bininfo.length / sizeof(u32)};
const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data);
const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, pgm.user_data, data);
return CompileSPV(program, instance.GetDevice());
};

View file

@ -31,20 +31,11 @@ void Rasterizer::DrawIndex() {
const auto cmdbuf = scheduler.CommandBuffer();
auto& regs = liverpool->regs;
static bool first_time = true;
if (first_time) {
first_time = false;
return;
}
UpdateDynamicState();
pipeline_cache.BindPipeline();
const u32 pitch = regs.color_buffers[0].Pitch();
const u32 height = regs.color_buffers[0].Height();
const u32 tile_max = regs.color_buffers[0].slice.tile_max;
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch);
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
const vk::RenderingAttachmentInfo color_info = {
.imageView = *image_view.image_view,

View file

@ -29,6 +29,9 @@ public:
/// Performs a draw call with an index buffer.
void DrawIndex();
/// Performs a draw call without an index buffer.
void DrawAuto();
/// Updates graphics state that is not part of the bound pipeline.
void UpdateDynamicState();

View file

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/image.h"
@ -65,6 +66,20 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
}
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
// There is a small difference between T# and CB number types, account for it.
const auto number_fmt =
buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: buffer.info.number_type;
is_tiled = true;
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt);
type = vk::ImageType::e2D;
size.width = buffer.Pitch();
size.height = buffer.Height();
pitch = size.width;
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}

View file

@ -6,6 +6,7 @@
#include "common/enum.h"
#include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h"
@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo {
ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept;
bool is_tiled = false;
vk::Format pixel_format = vk::Format::eUndefined;

View file

@ -101,8 +101,8 @@ TextureCache::~TextureCache() {
}
void TextureCache::OnCpuWrite(VAddr address) {
const VAddr address_aligned = address & ~((1 << PageBits) - 1);
ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) {
const VAddr address_aligned = address & ~((1 << PageShift) - 1);
ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
@ -137,26 +137,19 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
return image;
}
ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) {
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) {
if (image.cpu_addr == cpu_address) {
image_ids.push_back(image_id);
}
});
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) {
const ImageInfo info{buffer};
auto& image = FindImage(info, buffer.Address());
ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!");
auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()];
ImageViewInfo info;
info.format = vk::Format::eB8G8R8A8Srgb;
if (const ImageViewId view_id = image->FindView(info); view_id) {
ImageViewInfo view_info;
view_info.format = info.pixel_format;
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
return slot_image_views[view_id];
}
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image);
image->image_view_infos.emplace_back(info);
image->image_view_ids.emplace_back(view_id);
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];
}
@ -225,13 +218,13 @@ void TextureCache::UnregisterImage(ImageId image_id) {
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits);
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
return;
}
auto& image_ids = page_it.value();
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits);
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);
return;
}
image_ids.erase(vector_it);

View file

@ -37,7 +37,7 @@ public:
Image& FindImage(const ImageInfo& info, VAddr cpu_address);
/// Retrieves the render target with specified properties
ImageView& RenderTarget(VAddr cpu_address, u32 pitch);
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer);
/// Reuploads image contents.
void RefreshImage(Image& image);