This commit is contained in:
poly 2025-04-19 12:44:58 -03:00 committed by GitHub
commit 7810104952
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 157 additions and 98 deletions

View file

@ -270,6 +270,9 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (info.has_image_query) {
ctx.AddCapability(spv::Capability::ImageQuery);
}
if (info.has_layer_output) {
ctx.AddCapability(spv::Capability::ShaderLayer);
}
if (info.uses_lane_id) {
ctx.AddCapability(spv::Capability::GroupNonUniform);
}

View file

@ -13,36 +13,46 @@
namespace Shader::Backend::SPIRV {
namespace {
Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
Id MiscOutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 index, u32 element) {
const auto& output = ctx.runtime_info.GetOutputs()[index][element];
switch (output) {
case VsOutput::ClipDist0:
case VsOutput::ClipDist1:
case VsOutput::ClipDist2:
case VsOutput::ClipDist3:
case VsOutput::ClipDist4:
case VsOutput::ClipDist5:
case VsOutput::ClipDist6:
case VsOutput::ClipDist7: {
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
case Output::None:
LOG_WARNING(Render_Recompiler,
"Misc. vector output used but disabled on stage {}, attr {}, comp {}",
ctx.stage, attr, element);
return {};
case Output::MrtIndex: {
ASSERT(ctx.stage == Stage::Geometry || ctx.stage == Stage::Vertex);
return ctx.output_layer;
}
case Output::ClipDist0:
case Output::ClipDist1:
case Output::ClipDist2:
case Output::ClipDist3:
case Output::ClipDist4:
case Output::ClipDist5:
case Output::ClipDist6:
case Output::ClipDist7: {
const u32 index = u32(output) - u32(Output::ClipDist0);
const Id clip_num{ctx.ConstU32(index)};
ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined");
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
}
case VsOutput::CullDist0:
case VsOutput::CullDist1:
case VsOutput::CullDist2:
case VsOutput::CullDist3:
case VsOutput::CullDist4:
case VsOutput::CullDist5:
case VsOutput::CullDist6:
case VsOutput::CullDist7: {
const u32 index = u32(output) - u32(VsOutput::CullDist0);
case Output::CullDist0:
case Output::CullDist1:
case Output::CullDist2:
case Output::CullDist3:
case Output::CullDist4:
case Output::CullDist5:
case Output::CullDist6:
case Output::CullDist7: {
const u32 index = u32(output) - u32(Output::CullDist0);
const Id cull_num{ctx.ConstU32(index)};
ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined");
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
}
default:
UNREACHABLE();
UNREACHABLE_MSG("Unsupported output attribute {}", magic_enum::enum_name(output));
}
}
@ -80,7 +90,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
case IR::Attribute::Position2:
case IR::Attribute::Position3: {
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]);
return MiscOutputAttrPointer(ctx, attr, index, element);
}
case IR::Attribute::Depth:
return ctx.frag_depth;
@ -89,7 +99,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
}
}
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
return {ctx.F32[1], false};
@ -106,11 +116,19 @@ std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr
}
switch (attr) {
case IR::Attribute::Position0:
case IR::Attribute::Position1:
case IR::Attribute::Position2:
case IR::Attribute::Position3:
case IR::Attribute::Depth:
return {ctx.F32[1], false};
case IR::Attribute::Position1:
case IR::Attribute::Position2:
case IR::Attribute::Position3: {
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
const auto output = ctx.runtime_info.GetOutputs()[index][element];
if (output == Output::MrtIndex) {
return {ctx.S32[1], true};
} else {
return {ctx.F32[1], false};
}
}
default:
throw NotImplementedException("Write attribute {}", attr);
}
@ -337,12 +355,11 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Position1) {
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
const Id pointer{OutputAttrPointer(ctx, attr, element)};
if (!Sirit::ValidId(pointer)) {
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
const auto component_type{OutputAttrComponentType(ctx, attr)};
const auto component_type{OutputAttrComponentType(ctx, attr, element)};
if (component_type.second) {
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
} else {

View file

@ -510,6 +510,9 @@ void EmitContext::DefineOutputs() {
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
if (info.has_layer_output) {
output_layer = DefineVariable(S32[1], spv::BuiltIn::Layer, spv::StorageClass::Output);
}
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4;
if (num_attrs > 0) {
@ -580,6 +583,10 @@ void EmitContext::DefineOutputs() {
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
if (info.has_layer_output) {
output_layer = DefineVariable(S32[1], spv::BuiltIn::Layer, spv::StorageClass::Output);
}
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
@ -609,6 +616,9 @@ void EmitContext::DefineOutputs() {
break;
case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
if (info.has_layer_output) {
output_layer = DefineVariable(S32[1], spv::BuiltIn::Layer, spv::StorageClass::Output);
}
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
const Id id{DefineOutput(F32[4], attr_id)};

View file

@ -183,6 +183,7 @@ public:
boost::container::small_vector<Id, 16> interfaces;
Id output_position{};
Id output_layer{};
Id primitive_id{};
Id vertex_index{};
Id instance_id{};

View file

@ -196,6 +196,7 @@ struct Info {
bool has_discard{};
bool has_image_gather{};
bool has_image_query{};
bool has_layer_output{};
bool uses_lane_id{};
bool uses_group_quad{};
bool uses_group_ballot{};

View file

@ -11,9 +11,15 @@ void Visit(Info& info, const IR::Inst& inst) {
case IR::Opcode::GetAttributeU32:
info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32());
break;
case IR::Opcode::SetAttribute:
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
case IR::Opcode::SetAttribute: {
const IR::Attribute attr = inst.Arg(0).Attribute();
const u32 comp = inst.Arg(2).U32();
if (attr == IR::Attribute::Position1 && comp == 2u) {
info.has_layer_output = true;
}
info.stores.Set(attr, comp);
break;
}
case IR::Opcode::GetUserData:
info.ud_mask.Set(inst.Arg(0).ScalarReg());
break;

View file

@ -40,27 +40,14 @@ constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
return static_cast<Stage>(index);
}
struct LocalRuntimeInfo {
u32 ls_stride;
bool links_with_tcs;
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
};
struct ExportRuntimeInfo {
u32 vertex_data_size;
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
};
enum class VsOutput : u8 {
enum class Output : u8 {
None,
PointSprite,
EdgeFlag,
KillFlag,
GsCutFlag,
GsMrtIndex,
GsVpIndex,
MrtIndex,
VpIndex,
CullDist0,
CullDist1,
CullDist2,
@ -78,11 +65,23 @@ enum class VsOutput : u8 {
ClipDist6,
ClipDist7,
};
using VsOutputMap = std::array<VsOutput, 4>;
using OutputMap = std::array<Output, 4>;
struct LocalRuntimeInfo {
u32 ls_stride;
bool links_with_tcs;
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
};
struct ExportRuntimeInfo {
u32 vertex_data_size;
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
};
struct VertexRuntimeInfo {
u32 num_outputs;
std::array<VsOutputMap, 3> outputs;
std::array<OutputMap, 3> outputs;
bool emulate_depth_negative_one_to_one{};
bool clip_disable{};
// Domain
@ -143,6 +142,7 @@ struct HullRuntimeInfo {
static constexpr auto GsMaxOutputStreams = 4u;
using GsOutputPrimTypes = std::array<AmdGpu::GsOutputPrimitiveType, GsMaxOutputStreams>;
struct GeometryRuntimeInfo {
std::array<OutputMap, 3> outputs;
u32 num_invocations{};
u32 output_vertices{};
u32 in_vertex_data_size{};
@ -243,6 +243,14 @@ struct RuntimeInfo {
stage = stage_;
}
const auto& GetOutputs() const noexcept {
return stage == Stage::Vertex ? vs_info.outputs : gs_info.outputs;
}
auto& GetOutputs() noexcept {
return stage == Stage::Vertex ? vs_info.outputs : gs_info.outputs;
}
bool operator==(const RuntimeInfo& other) const noexcept {
switch (stage) {
case Stage::Fragment:

View file

@ -371,6 +371,7 @@ bool Instance::CreateDevice() {
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
.hostQueryReset = vk12_features.hostQueryReset,
.timelineSemaphore = vk12_features.timelineSemaphore,
.shaderOutputLayer = vk12_features.shaderOutputLayer,
},
// Vulkan 1.3 promoted extensions
vk::PhysicalDeviceDynamicRenderingFeaturesKHR{

View file

@ -23,8 +23,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
namespace Vulkan {
using Shader::LogicalStage;
using Shader::Output;
using Shader::Stage;
using Shader::VsOutput;
constexpr static std::array DescriptorHeapSizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 8192},
@ -33,49 +33,52 @@ constexpr static std::array DescriptorHeapSizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
};
void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
const AmdGpu::Liverpool::VsOutputControl& ctl) {
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
w != VsOutput::None) {
info.outputs[info.num_outputs++] = Shader::VsOutputMap{x, y, z, w};
}
};
void GatherMiscOutputs(Shader::RuntimeInfo& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
// VS_OUT_MISC_VEC
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
ctl.use_vtx_edge_flag
? VsOutput::EdgeFlag
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
ctl.use_vtx_kill_flag
? VsOutput::KillFlag
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
auto& outputs = info.GetOutputs();
if (ctl.vs_out_misc_enable) {
auto& misc_vec = outputs[0];
misc_vec[0] = ctl.use_vtx_point_size ? Output::PointSprite : Output::None;
misc_vec[1] = ctl.use_vtx_edge_flag
? Output::EdgeFlag
: (ctl.use_vtx_gs_cut_flag ? Output::GsCutFlag : Output::None);
misc_vec[2] = ctl.use_vtx_kill_flag
? Output::KillFlag
: (ctl.use_vtx_render_target_idx ? Output::MrtIndex : Output::None);
misc_vec[3] = ctl.use_vtx_viewport_idx ? Output::VpIndex : Output::None;
}
// VS_OUT_CCDIST0
add_output(ctl.IsClipDistEnabled(0)
? VsOutput::ClipDist0
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
ctl.IsClipDistEnabled(1)
? VsOutput::ClipDist1
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
ctl.IsClipDistEnabled(2)
? VsOutput::ClipDist2
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
ctl.IsClipDistEnabled(3)
? VsOutput::ClipDist3
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
if (ctl.vs_out_ccdist0_enable) {
auto& ccdist0 = outputs[1];
ccdist0[0] = ctl.IsClipDistEnabled(0)
? Output::ClipDist0
: (ctl.IsCullDistEnabled(0) ? Output::CullDist0 : Output::None);
ccdist0[1] = ctl.IsClipDistEnabled(1)
? Output::ClipDist1
: (ctl.IsCullDistEnabled(1) ? Output::CullDist1 : Output::None);
ccdist0[2] = ctl.IsClipDistEnabled(2)
? Output::ClipDist2
: (ctl.IsCullDistEnabled(2) ? Output::CullDist2 : Output::None);
ccdist0[3] = ctl.IsClipDistEnabled(3)
? Output::ClipDist3
: (ctl.IsCullDistEnabled(3) ? Output::CullDist3 : Output::None);
}
// VS_OUT_CCDIST1
add_output(ctl.IsClipDistEnabled(4)
? VsOutput::ClipDist4
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
ctl.IsClipDistEnabled(5)
? VsOutput::ClipDist5
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
ctl.IsClipDistEnabled(6)
? VsOutput::ClipDist6
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
ctl.IsClipDistEnabled(7)
? VsOutput::ClipDist7
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
if (ctl.vs_out_ccdist1_enable) {
auto& ccdist1 = outputs[2];
ccdist1[0] = ctl.IsClipDistEnabled(4)
? Output::ClipDist4
: (ctl.IsCullDistEnabled(4) ? Output::CullDist4 : Output::None);
ccdist1[1] = ctl.IsClipDistEnabled(5)
? Output::ClipDist5
: (ctl.IsCullDistEnabled(5) ? Output::CullDist5 : Output::None);
ccdist1[2] = ctl.IsClipDistEnabled(6)
? Output::ClipDist6
: (ctl.IsCullDistEnabled(6) ? Output::CullDist6 : Output::None);
ccdist1[3] = ctl.IsClipDistEnabled(7)
? Output::ClipDist7
: (ctl.IsCullDistEnabled(7) ? Output::CullDist7 : Output::None);
}
}
const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
@ -119,7 +122,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
}
case Stage::Vertex: {
BuildCommon(regs.vs_program);
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
GatherMiscOutputs(info, regs.vs_output_control);
info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() &&
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
@ -133,6 +136,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
}
case Stage::Geometry: {
BuildCommon(regs.gs_program);
GatherMiscOutputs(info, regs.vs_output_control);
auto& gs_info = info.gs_info;
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
gs_info.num_invocations =
@ -305,7 +309,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
}
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
// First pass of bindings check to idenitfy formats and swizzles and pass them to the shader
// recompiler.
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];

View file

@ -162,7 +162,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
const auto mip = image_view.info.range.base.level;
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
state.color_attachments[state.num_color_attachments++] = {
state.color_attachments[state.num_color_attachments] = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
@ -170,6 +170,8 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
};
state.cb_slices[state.num_color_attachments] = image_view.info.range.extent.layers;
state.num_color_attachments++;
}
if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) ||
@ -196,6 +198,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
state.has_depth = regs.depth_buffer.DepthValid();
state.has_stencil = regs.depth_buffer.StencilValid();
if (state.has_depth) {
state.depth_slices = image_view.info.range.extent.layers;
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,

View file

@ -34,18 +34,21 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
is_rendering = true;
render_state = new_state;
const auto width =
const u32 width =
render_state.width != std::numeric_limits<u32>::max() ? render_state.width : 1;
const auto height =
const u32 height =
render_state.height != std::numeric_limits<u32>::max() ? render_state.height : 1;
const u32 cb_layers = *std::ranges::max_element(render_state.cb_slices);
const u32 layers = std::max<u32>(cb_layers, render_state.depth_slices);
const vk::RenderingInfo rendering_info = {
.renderArea =
{
.offset = {0, 0},
.extent = {width, height},
},
.layerCount = 1,
.layerCount = layers,
.colorAttachmentCount = render_state.num_color_attachments,
.pColorAttachments = render_state.num_color_attachments > 0
? render_state.color_attachments.data()

View file

@ -28,6 +28,8 @@ struct RenderState {
bool has_stencil{};
u32 width = std::numeric_limits<u32>::max();
u32 height = std::numeric_limits<u32>::max();
std::array<u32, 8> cb_slices{1};
u32 depth_slices{0};
bool operator==(const RenderState& other) const noexcept {
return std::memcmp(this, &other, sizeof(RenderState)) == 0;

View file

@ -436,9 +436,9 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
const ImageId image_id = FindImage(desc);
Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified;
image.flags &= ~ImageFlagBits::Dirty;
image.usage.depth_target = 1u;
image.usage.stencil = image.info.HasStencil();
UpdateImage(image_id);
// Register meta data for this depth buffer
if (!(image.flags & ImageFlagBits::MetaRegistered)) {