video_core: propagate index/offset SGPRs to vkCmdDraw instead of offsetting the buffer address

2025-04-21 12:04:45 +00:00 · 2024-08-23 17:23:34 -03:00 · 2024-08-23 17:23:34 -03:00 · d9ea23badc
commit d9ea23badc
parent 68f042001c
6 changed files with 53 additions and 45 deletions
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@ -3,6 +3,7 @@

 #include <algorithm>
 #include <boost/container/static_vector.hpp>
+#include "common/assert.h"
 #include "shader_recompiler/frontend/decode.h"
 #include "shader_recompiler/frontend/fetch_shader.h"

@ -33,7 +34,7 @@ namespace Shader::Gcn {
 * We take the reverse way, extract the original input semantics from these instructions.
 **/

-std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
+FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
    std::vector<VertexAttribute> attributes;
    GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
    GcnDecodeContext decoder;
@ -44,8 +45,9 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
        s32 dst_reg{-1};
    };
    boost::container::static_vector<VsharpLoad, 16> loads;
-    std::array<u32, 16> offsets{};
-    offsets.fill(0xFF);
+
+    s8 fetch_index_sgpr = -1;
+    s8 fetch_offset_sgpr = -1;

    u32 semantic_index = 0;
    while (!code_slice.atEnd()) {
@ -62,7 +64,18 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
        }

        if (inst.opcode == Opcode::V_ADD_I32) {
-            offsets[inst.dst[0].code] = inst.src[0].code;
+            const auto vgpr = inst.dst[0].code;
+            const auto sgpr = s8(inst.src[0].code);
+            switch (vgpr) {
+            case 0: // V0 is always the index
+                fetch_index_sgpr = sgpr;
+                break;
+            case 3: // V3 is always the offset
+                fetch_offset_sgpr = sgpr;
+                break;
+            default:
+                UNREACHABLE();
+            }
        }

        if (inst.inst_class == InstClass::VectorMemBufFmt) {
@ -74,29 +87,13 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
            const auto it = std::ranges::find_if(
                loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });

-            auto mubuf = inst.control.mubuf;
-
            auto& attrib = attributes.emplace_back();
            attrib.semantic = semantic_index++;
            attrib.dest_vgpr = inst.src[1].code;
-            attrib.num_elements = mubuf.count;
+            attrib.num_elements = inst.control.mubuf.count;
            attrib.sgpr_base = it->base_sgpr;
            attrib.dword_offset = it->dword_offset;

-            u8 soofs = inst.src[0].code;
-
-            if (mubuf.idxen != 0) {
-                attrib.index_sgpr = offsets[soofs++];
-            } else {
-                attrib.index_sgpr = 0xFF;
-            }
-
-            if (mubuf.offen != 0) {
-                attrib.offset_sgpr = offsets[soofs];
-            } else {
-                attrib.offset_sgpr = 0xFF;
-            }
-
            // Store instance id rate
            attrib.instance_data = inst.src[0].code;

@ -105,7 +102,11 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
        }
    }

-    return attributes;
+    return FetchShaderData{
+        .attributes = std::move(attributes),
+        .fetch_index_sgpr = fetch_index_sgpr,
+        .fetch_offset_sgpr = fetch_offset_sgpr,
+    };
 }

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@ -15,10 +15,14 @@ struct VertexAttribute {
    u8 sgpr_base;     ///< SGPR that contains the pointer to the list of vertex V#
    u8 dword_offset;  ///< The dword offset of the V# that describes this attribute.
    u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
-    u8 index_sgpr;    ///< Read index from VADDR
-    u8 offset_sgpr;   ///< Offset from VADDR
 };

-std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
+struct FetchShaderData {
+    std::vector<VertexAttribute> attributes;
+    s8 fetch_index_sgpr;  ///< Read index from VADDR
+    s8 fetch_offset_sgpr; ///< Read offset from VADDR
+};
+
+FetchShaderData ParseFetchShader(const u32* code, u32* out_size);

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -346,7 +346,7 @@ void Translator::EmitFetch(const GcnInst& inst) {

    // Parse the assembly to generate a list of attributes.
    u32 fetch_size{};
-    const auto attribs = ParseFetchShader(code, &fetch_size);
+    const auto fetch_data = ParseFetchShader(code, &fetch_size);

    if (Config::dumpShaders()) {
        using namespace Common::FS;
@ -359,7 +359,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
        file.WriteRaw<u8>(code, fetch_size);
    }

-    for (const auto& attrib : attribs) {
+    info.fetch_index_sgpr = fetch_data.fetch_index_sgpr;
+    info.fetch_offset_sgpr = fetch_data.fetch_offset_sgpr;
+
+    for (const auto& attrib : fetch_data.attributes) {
        const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
        IR::VectorReg dst_reg{attrib.dest_vgpr};

@ -412,8 +415,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
            .num_components = std::min<u16>(attrib.num_elements, num_components),
            .sgpr_base = attrib.sgpr_base,
            .dword_offset = attrib.dword_offset,
-            .index_sgpr = attrib.index_sgpr,
-            .offset_sgpr = attrib.offset_sgpr,
            .instance_step_rate = step_rate,
            .instance_data_buf = instance_buf_handle,
        });
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -135,8 +135,6 @@ struct Info {
        u16 num_components;
        u8 sgpr_base;
        u8 dword_offset;
-        u8 index_sgpr;
-        u8 offset_sgpr;
        InstanceIdType instance_step_rate;
        s32 instance_data_buf;
    };
@ -177,6 +175,9 @@ struct Info {
    AttributeFlags stores{};
    boost::container::static_vector<VsOutputMap, 3> vs_outputs;

+    s8 fetch_index_sgpr = -1;
+    s8 fetch_offset_sgpr = -1;
+
    BufferResourceList buffers;
    ImageResourceList images;
    SamplerResourceList samplers;
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@ -101,7 +101,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
    }

    std::array<vk::Buffer, NUM_VERTEX_BUFFERS> host_buffers;
-    std::array<vk::DeviceSize, NUM_VERTEX_BUFFERS> host_offsets{};
+    std::array<vk::DeviceSize, NUM_VERTEX_BUFFERS> host_offsets;
    boost::container::static_vector<AmdGpu::Buffer, NUM_VERTEX_BUFFERS> guest_buffers;

    struct BufferRange {
@ -131,16 +131,6 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
        }
        guest_buffers.emplace_back(buffer);
        ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
-
-        u32 offset = 0;
-        if (input.index_sgpr != 0xFF) {
-            offset += vs_info.user_data[input.index_sgpr] * buffer.GetStride();
-        }
-        if (input.offset_sgpr != 0xFF) {
-            offset += vs_info.user_data[input.offset_sgpr];
-        }
-        host_offsets[guest_buffers.size() - 1] = offset;
-
        attributes.push_back({
            .location = input.binding,
            .binding = input.binding,
@ -190,7 +180,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
        ASSERT(host_buffer != ranges_merged.cend());

        host_buffers[i] = host_buffer->vk_buffer;
-        host_offsets[i] += host_buffer->offset + buffer.base_address - host_buffer->base_address;
+        host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address;
    }

    if (num_buffers > 0) {
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -51,13 +51,24 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
    BeginRendering();
    UpdateDynamicState(*pipeline);

+    u32 instance_offset = 0;
+    if (vs_info.fetch_offset_sgpr != -1) {
+        instance_offset = vs_info.user_data[vs_info.fetch_offset_sgpr];
+    }
+
    if (is_indexed) {
-        cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
+        u32 vertex_offset = 0;
+        if (vs_info.fetch_index_sgpr != -1) {
+            vertex_offset = vs_info.user_data[vs_info.fetch_index_sgpr];
+        }
+
+        cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
+                           instance_offset);
    } else {
        const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList
                                     ? 4
                                     : regs.num_indices;
-        cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
+        cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, instance_offset);
    }
 }