mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 12:04:45 +00:00
video_core: propagate index/offset SGPRs to vkCmdDraw instead of offsetting the buffer address
This commit is contained in:
parent
68f042001c
commit
d9ea23badc
6 changed files with 53 additions and 45 deletions
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
|
||||
|
@ -33,7 +34,7 @@ namespace Shader::Gcn {
|
|||
* We take the reverse way, extract the original input semantics from these instructions.
|
||||
**/
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
|
||||
std::vector<VertexAttribute> attributes;
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
@ -44,8 +45,9 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
|||
s32 dst_reg{-1};
|
||||
};
|
||||
boost::container::static_vector<VsharpLoad, 16> loads;
|
||||
std::array<u32, 16> offsets{};
|
||||
offsets.fill(0xFF);
|
||||
|
||||
s8 fetch_index_sgpr = -1;
|
||||
s8 fetch_offset_sgpr = -1;
|
||||
|
||||
u32 semantic_index = 0;
|
||||
while (!code_slice.atEnd()) {
|
||||
|
@ -62,7 +64,18 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
|||
}
|
||||
|
||||
if (inst.opcode == Opcode::V_ADD_I32) {
|
||||
offsets[inst.dst[0].code] = inst.src[0].code;
|
||||
const auto vgpr = inst.dst[0].code;
|
||||
const auto sgpr = s8(inst.src[0].code);
|
||||
switch (vgpr) {
|
||||
case 0: // V0 is always the index
|
||||
fetch_index_sgpr = sgpr;
|
||||
break;
|
||||
case 3: // V3 is always the offset
|
||||
fetch_offset_sgpr = sgpr;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
if (inst.inst_class == InstClass::VectorMemBufFmt) {
|
||||
|
@ -74,29 +87,13 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
|||
const auto it = std::ranges::find_if(
|
||||
loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
|
||||
|
||||
auto mubuf = inst.control.mubuf;
|
||||
|
||||
auto& attrib = attributes.emplace_back();
|
||||
attrib.semantic = semantic_index++;
|
||||
attrib.dest_vgpr = inst.src[1].code;
|
||||
attrib.num_elements = mubuf.count;
|
||||
attrib.num_elements = inst.control.mubuf.count;
|
||||
attrib.sgpr_base = it->base_sgpr;
|
||||
attrib.dword_offset = it->dword_offset;
|
||||
|
||||
u8 soofs = inst.src[0].code;
|
||||
|
||||
if (mubuf.idxen != 0) {
|
||||
attrib.index_sgpr = offsets[soofs++];
|
||||
} else {
|
||||
attrib.index_sgpr = 0xFF;
|
||||
}
|
||||
|
||||
if (mubuf.offen != 0) {
|
||||
attrib.offset_sgpr = offsets[soofs];
|
||||
} else {
|
||||
attrib.offset_sgpr = 0xFF;
|
||||
}
|
||||
|
||||
// Store instance id rate
|
||||
attrib.instance_data = inst.src[0].code;
|
||||
|
||||
|
@ -105,7 +102,11 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
|||
}
|
||||
}
|
||||
|
||||
return attributes;
|
||||
return FetchShaderData{
|
||||
.attributes = std::move(attributes),
|
||||
.fetch_index_sgpr = fetch_index_sgpr,
|
||||
.fetch_offset_sgpr = fetch_offset_sgpr,
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -15,10 +15,14 @@ struct VertexAttribute {
|
|||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||
u8 index_sgpr; ///< Read index from VADDR
|
||||
u8 offset_sgpr; ///< Offset from VADDR
|
||||
};
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
|
||||
struct FetchShaderData {
|
||||
std::vector<VertexAttribute> attributes;
|
||||
s8 fetch_index_sgpr; ///< Read index from VADDR
|
||||
s8 fetch_offset_sgpr; ///< Read offset from VADDR
|
||||
};
|
||||
|
||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -346,7 +346,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
|
||||
// Parse the assembly to generate a list of attributes.
|
||||
u32 fetch_size{};
|
||||
const auto attribs = ParseFetchShader(code, &fetch_size);
|
||||
const auto fetch_data = ParseFetchShader(code, &fetch_size);
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
using namespace Common::FS;
|
||||
|
@ -359,7 +359,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
file.WriteRaw<u8>(code, fetch_size);
|
||||
}
|
||||
|
||||
for (const auto& attrib : attribs) {
|
||||
info.fetch_index_sgpr = fetch_data.fetch_index_sgpr;
|
||||
info.fetch_offset_sgpr = fetch_data.fetch_offset_sgpr;
|
||||
|
||||
for (const auto& attrib : fetch_data.attributes) {
|
||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
|
||||
|
@ -412,8 +415,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.index_sgpr = attrib.index_sgpr,
|
||||
.offset_sgpr = attrib.offset_sgpr,
|
||||
.instance_step_rate = step_rate,
|
||||
.instance_data_buf = instance_buf_handle,
|
||||
});
|
||||
|
|
|
@ -135,8 +135,6 @@ struct Info {
|
|||
u16 num_components;
|
||||
u8 sgpr_base;
|
||||
u8 dword_offset;
|
||||
u8 index_sgpr;
|
||||
u8 offset_sgpr;
|
||||
InstanceIdType instance_step_rate;
|
||||
s32 instance_data_buf;
|
||||
};
|
||||
|
@ -177,6 +175,9 @@ struct Info {
|
|||
AttributeFlags stores{};
|
||||
boost::container::static_vector<VsOutputMap, 3> vs_outputs;
|
||||
|
||||
s8 fetch_index_sgpr = -1;
|
||||
s8 fetch_offset_sgpr = -1;
|
||||
|
||||
BufferResourceList buffers;
|
||||
ImageResourceList images;
|
||||
SamplerResourceList samplers;
|
||||
|
|
|
@ -101,7 +101,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
}
|
||||
|
||||
std::array<vk::Buffer, NUM_VERTEX_BUFFERS> host_buffers;
|
||||
std::array<vk::DeviceSize, NUM_VERTEX_BUFFERS> host_offsets{};
|
||||
std::array<vk::DeviceSize, NUM_VERTEX_BUFFERS> host_offsets;
|
||||
boost::container::static_vector<AmdGpu::Buffer, NUM_VERTEX_BUFFERS> guest_buffers;
|
||||
|
||||
struct BufferRange {
|
||||
|
@ -131,16 +131,6 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||
|
||||
u32 offset = 0;
|
||||
if (input.index_sgpr != 0xFF) {
|
||||
offset += vs_info.user_data[input.index_sgpr] * buffer.GetStride();
|
||||
}
|
||||
if (input.offset_sgpr != 0xFF) {
|
||||
offset += vs_info.user_data[input.offset_sgpr];
|
||||
}
|
||||
host_offsets[guest_buffers.size() - 1] = offset;
|
||||
|
||||
attributes.push_back({
|
||||
.location = input.binding,
|
||||
.binding = input.binding,
|
||||
|
@ -190,7 +180,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
ASSERT(host_buffer != ranges_merged.cend());
|
||||
|
||||
host_buffers[i] = host_buffer->vk_buffer;
|
||||
host_offsets[i] += host_buffer->offset + buffer.base_address - host_buffer->base_address;
|
||||
host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address;
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
|
|
|
@ -51,13 +51,24 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
BeginRendering();
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
u32 instance_offset = 0;
|
||||
if (vs_info.fetch_offset_sgpr != -1) {
|
||||
instance_offset = vs_info.user_data[vs_info.fetch_offset_sgpr];
|
||||
}
|
||||
|
||||
if (is_indexed) {
|
||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||
u32 vertex_offset = 0;
|
||||
if (vs_info.fetch_index_sgpr != -1) {
|
||||
vertex_offset = vs_info.user_data[vs_info.fetch_index_sgpr];
|
||||
}
|
||||
|
||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
|
||||
instance_offset);
|
||||
} else {
|
||||
const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList
|
||||
? 4
|
||||
: regs.num_indices;
|
||||
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
|
||||
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, instance_offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue