rsx: Refactor index buffers

- Index offset is ignored anyway and only used to calculate vertex attribute divisor index
- Specialized optimization for untouched xfer without primitive restart
This commit is contained in:
kd-11 2019-01-14 15:33:05 +03:00 committed by kd-11
parent afeacc171f
commit 417a2e6731
14 changed files with 283 additions and 210 deletions

View file

@ -206,12 +206,12 @@ namespace rsx
auto fifo = vm::ptr<u16>::make(idxAddr);
for (u32 i = 0; i < idxCount; ++i)
{
u32 index = fifo[i];
if (is_primitive_restart_enabled && index == primitive_restart_index)
u16 index = fifo[i];
if (is_primitive_restart_enabled && (u32)index == primitive_restart_index)
continue;
index = get_index_from_base(index, method_registers.vertex_data_base_index());
min_index = std::min(index, min_index);
max_index = std::max(index, max_index);
index = (u16)get_index_from_base(index, method_registers.vertex_data_base_index());
min_index = (u16)std::min(index, (u16)min_index);
max_index = (u16)std::max(index, (u16)max_index);
}
break;
}

View file

@ -3,6 +3,8 @@
#include "../rsx_methods.h"
#include "Utilities/sysinfo.h"
#include <limits>
#define DEBUG_VERTEX_STREAMING 0
const bool s_use_ssse3 =
@ -538,139 +540,176 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
namespace
{
template<typename T>
std::tuple<u32, u32, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<u32> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index, u32 base_index)
{
u32 min_index = -1;
u32 max_index = 0;
verify(HERE), (dst.size_bytes() >= src.size_bytes());
u32 dst_idx = 0;
for (T index : src)
template <typename T>
constexpr T index_limit()
{
if (is_primitive_restart_enabled && (u32)index == primitive_restart_index)
{
// List types do not need primitive restart. Just skip over this instead
if (rsx::method_registers.current_draw_clause.is_disjoint_primitive)
continue;
return std::numeric_limits<T>::max();
}
dst[dst_idx++] = ~0u;
template <typename T>
const T& min_max(T& min, T& max, const T& value)
{
if (value < min)
min = value;
if (value > max)
max = value;
return value;
}
struct untouched_impl
{
template<typename T>
static
std::tuple<T, T, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<T> dst)
{
T min_index = index_limit<T>(), max_index = 0;
u32 dst_index = 0;
for (const T index : src)
{
dst[dst_index++] = min_max(min_index, max_index, index);
}
return std::make_tuple(min_index, max_index, dst_index);
}
};
struct primitive_restart_impl
{
template<typename T>
static
std::tuple<T, T, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, u32 restart_index, bool skip_restart)
{
T min_index = index_limit<T>(), max_index = 0;
u32 dst_index = 0;
for (const T index : src)
{
if (index == restart_index)
{
if (!skip_restart)
{
dst[dst_index++] = index_limit<T>();
}
}
else
{
dst[dst_index++] = min_max(min_index, max_index, index);
}
}
return std::make_tuple(min_index, max_index, dst_index);
}
};
template<typename T>
std::tuple<T, T, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, rsx::primitive_type draw_mode, bool is_primitive_restart_enabled, u32 primitive_restart_index)
{
if (LIKELY(!is_primitive_restart_enabled))
{
return untouched_impl::upload_untouched(src, dst);
}
else
{
const u32 new_index = rsx::get_index_from_base((u32)index, base_index);
max_index = std::max(max_index, new_index);
min_index = std::min(min_index, new_index);
dst[dst_idx++] = new_index;
return primitive_restart_impl::upload_untouched(src, dst, primitive_restart_index, is_primitive_disjointed(draw_mode));
}
}
return std::make_tuple(min_index, max_index, dst_idx);
}
template<typename T>
std::tuple<u32, u32, u32> expand_indexed_triangle_fan(gsl::span<to_be_t<const T>> src, gsl::span<u32> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index, u32 base_index)
{
const u32 invalid_index = ~0u;
u32 min_index = invalid_index;
u32 max_index = 0;
verify(HERE), (dst.size() >= 3 * (src.size() - 2));
u32 dst_idx = 0;
u32 src_idx = 0;
bool needs_anchor = true;
u32 anchor = invalid_index;
u32 last_index = invalid_index;
for (size_t src_idx = 0; src_idx < src.size(); ++src_idx)
template<typename T>
std::tuple<T, T, u32> expand_indexed_triangle_fan(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index)
{
u32 index = src[src_idx];
index = rsx::get_index_from_base(index, base_index);
const T invalid_index = index_limit<T>();
if (needs_anchor)
T min_index = invalid_index;
T max_index = 0;
verify(HERE), (dst.size() >= 3 * (src.size() - 2));
u32 dst_idx = 0;
u32 src_idx = 0;
bool needs_anchor = true;
T anchor = invalid_index;
T last_index = invalid_index;
for (const T index : src)
{
if (is_primitive_restart_enabled && (u32)src[src_idx] == primitive_restart_index)
if (needs_anchor)
{
if (is_primitive_restart_enabled && index == primitive_restart_index)
continue;
anchor = index;
needs_anchor = false;
continue;
}
anchor = index;
needs_anchor = false;
continue;
}
if (is_primitive_restart_enabled && index == primitive_restart_index)
{
needs_anchor = true;
last_index = invalid_index;
continue;
}
if (is_primitive_restart_enabled && (u32)src[src_idx] == primitive_restart_index)
{
needs_anchor = true;
last_index = invalid_index;
continue;
}
if (last_index == invalid_index)
{
//Need at least one anchor and one outer index to create a triangle
last_index = index;
continue;
}
max_index = std::max(max_index, index);
min_index = std::min(min_index, index);
dst[dst_idx++] = anchor;
dst[dst_idx++] = last_index;
dst[dst_idx++] = min_max(min_index, max_index, index);
if (last_index == invalid_index)
{
//Need at least one anchor and one outer index to create a triangle
last_index = index;
continue;
}
dst[dst_idx++] = anchor;
dst[dst_idx++] = last_index;
dst[dst_idx++] = index;
last_index = index;
return std::make_tuple(min_index, max_index, dst_idx);
}
return std::make_tuple(min_index, max_index, dst_idx);
}
template<typename T>
std::tuple<u32, u32, u32> expand_indexed_quads(gsl::span<to_be_t<const T>> src, gsl::span<u32> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index, u32 base_index)
{
u32 min_index = -1;
u32 max_index = 0;
verify(HERE), (4 * dst.size_bytes() >= 6 * src.size_bytes());
u32 dst_idx = 0;
u8 set_size = 0;
u32 tmp_indices[4];
for (int src_idx = 0; src_idx < src.size(); ++src_idx)
template<typename T>
std::tuple<T, T, u32> expand_indexed_quads(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index)
{
u32 index = src[src_idx];
index = rsx::get_index_from_base(index, base_index);
if (is_primitive_restart_enabled && (u32)src[src_idx] == primitive_restart_index)
T min_index = index_limit<T>();
T max_index = 0;
verify(HERE), (4 * dst.size_bytes() >= 6 * src.size_bytes());
u32 dst_idx = 0;
u8 set_size = 0;
T tmp_indices[4];
for (const T index : src)
{
//empty temp buffer
set_size = 0;
continue;
if (is_primitive_restart_enabled && index == primitive_restart_index)
{
//empty temp buffer
set_size = 0;
continue;
}
tmp_indices[set_size++] = min_max(min_index, max_index, index);
if (set_size == 4)
{
// First triangle
dst[dst_idx++] = tmp_indices[0];
dst[dst_idx++] = tmp_indices[1];
dst[dst_idx++] = tmp_indices[2];
// Second triangle
dst[dst_idx++] = tmp_indices[2];
dst[dst_idx++] = tmp_indices[3];
dst[dst_idx++] = tmp_indices[0];
set_size = 0;
}
}
tmp_indices[set_size++] = index;
max_index = std::max(max_index, index);
min_index = std::min(min_index, index);
if (set_size == 4)
{
// First triangle
dst[dst_idx++] = tmp_indices[0];
dst[dst_idx++] = tmp_indices[1];
dst[dst_idx++] = tmp_indices[2];
// Second triangle
dst[dst_idx++] = tmp_indices[2];
dst[dst_idx++] = tmp_indices[3];
dst[dst_idx++] = tmp_indices[0];
set_size = 0;
}
return std::make_tuple(min_index, max_index, dst_idx);
}
return std::make_tuple(min_index, max_index, dst_idx);
}
}
// Only handle quads and triangle fan now
@ -697,10 +736,21 @@ bool is_primitive_native(rsx::primitive_type draw_mode)
fmt::throw_exception("Wrong primitive type" HERE);
}
/** We assume that polygon is convex in polygon mode (constraints in OpenGL)
*In such case polygon triangulation equates to triangle fan with arbitrary start vertex
* see http://www.gamedev.net/page/resources/_/technical/graphics-programming-and-theory/polygon-triangulation-r3334
*/
bool is_primitive_disjointed(rsx::primitive_type draw_mode)
{
switch (draw_mode)
{
case rsx::primitive_type::line_loop:
case rsx::primitive_type::line_strip:
case rsx::primitive_type::polygon:
case rsx::primitive_type::quad_strip:
case rsx::primitive_type::triangle_fan:
case rsx::primitive_type::triangle_strip:
return false;
default:
return true;
}
}
u32 get_index_count(rsx::primitive_type draw_mode, u32 initial_index_count)
{
@ -791,30 +841,35 @@ namespace
return std::make_tuple(first, count);
}
// TODO: Unify indexed and non indexed primitive expansion ?
template<typename T>
std::tuple<u32, u32, u32> write_index_array_data_to_buffer_impl(gsl::span<u32> dst,
std::tuple<T, T, u32> write_index_array_data_to_buffer_impl(gsl::span<T> dst,
gsl::span<const be_t<T>> src,
rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
std::function<bool(rsx::primitive_type)> expands)
{
if (!expands(draw_mode)) return upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
if (LIKELY(!expands(draw_mode)))
{
return upload_untouched<T>(src, dst, draw_mode, restart_index_enabled, restart_index);
}
switch (draw_mode)
{
case rsx::primitive_type::line_loop:
{
const auto &returnvalue = upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
const auto &returnvalue = upload_untouched<T>(src, dst, draw_mode, restart_index_enabled, restart_index);
const auto index_count = dst.size_bytes() / sizeof(T);
dst[index_count] = src[0];
return returnvalue;
}
case rsx::primitive_type::polygon:
case rsx::primitive_type::triangle_fan:
return expand_indexed_triangle_fan<T>(src, dst, restart_index_enabled, restart_index, base_index);
{
return expand_indexed_triangle_fan<T>(src, dst, restart_index_enabled, restart_index);
}
case rsx::primitive_type::quads:
return expand_indexed_quads<T>(src, dst, restart_index_enabled, restart_index, base_index);
{
return expand_indexed_quads<T>(src, dst, restart_index_enabled, restart_index);
}
default:
fmt::throw_exception("Unknown draw mode (0x%x)" HERE, (u32)draw_mode);
}
@ -824,23 +879,23 @@ namespace
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst_ptr,
gsl::span<const gsl::byte> src_ptr,
rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
std::function<bool(rsx::primitive_type)> expands)
{
switch (type)
{
case rsx::index_array_type::u16:
{
return write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u32>(dst_ptr),
as_const_span<const be_t<u16>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
}
case rsx::index_array_type::u32:
{
return write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst_ptr),
as_const_span<const be_t<u32>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
}
default:
fmt::throw_exception("Unreachable" HERE);
}
switch (type)
{
case rsx::index_array_type::u16:
{
return write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u16>(dst_ptr),
as_const_span<const be_t<u16>>(src_ptr), draw_mode, restart_index_enabled, restart_index, expands);
}
case rsx::index_array_type::u32:
{
return write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst_ptr),
as_const_span<const be_t<u32>>(src_ptr), draw_mode, restart_index_enabled, restart_index, expands);
}
default:
fmt::throw_exception("Unreachable" HERE);
}
}
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)

View file

@ -17,6 +17,11 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
*/
bool is_primitive_native(rsx::primitive_type m_draw_mode);
/*
* Returns true if adjacency information does not matter for this type. Allows optimizations e.g removal of primitive restart index
*/
bool is_primitive_disjointed(rsx::primitive_type draw_mode);
/**
* Returns a fixed index count for emulated primitive, otherwise returns initial_index_count
*/
@ -34,7 +39,7 @@ u32 get_index_type_size(rsx::index_array_type type);
*/
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst, gsl::span<const gsl::byte> src,
rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
u32 base_index, std::function<bool(rsx::primitive_type)> expands);
std::function<bool(rsx::primitive_type)> expands);
/**
* Write index data needed to emulate non indexed non native primitive mode.

View file

@ -357,11 +357,11 @@ namespace glsl
" //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"
" if (desc.modulo)\n"
" {\n"
" vertex_id = " << vertex_id_name << " % int(desc.frequency);\n"
" vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) % int(desc.frequency);\n"
" }\n"
" else\n"
" {\n"
" vertex_id = " << vertex_id_name << " / int(desc.frequency); \n"
" vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) / int(desc.frequency); \n"
" }\n"
" }\n"
"\n"

View file

@ -377,7 +377,7 @@ namespace
rsx::index_array_type::u32:
rsx::method_registers.index_type();
constexpr size_t index_size = sizeof(u32); // Force u32 destination to avoid overflows when adding base
size_t index_size = get_index_type_size(indexed_type);
// Alloc
size_t buffer_size = align(index_count * index_size, 64);
@ -395,12 +395,12 @@ namespace
rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.restart_index_enabled(),
rsx::method_registers.restart_index(),
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !is_primitive_native(prim); });
[](auto prim) { return !is_primitive_native(prim); });
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, (UINT)buffer_size,
DXGI_FORMAT_R32_UINT};
get_index_type(indexed_type)};
// m_timers.buffer_upload_size += buffer_size;
command_list->IASetIndexBuffer(&index_buffer_view);

View file

@ -1421,10 +1421,13 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
// Vertex layout state
auto mapping = m_vertex_layout_buffer->alloc_from_heap(128 + 16, m_uniform_buffer_offset_align);
auto buf = static_cast<s32*>(mapping.first);
*buf = upload_info.vertex_index_base;
auto buf = static_cast<u32*>(mapping.first);
buf[0] = upload_info.vertex_index_base;
buf[1] = upload_info.vertex_index_offset;
buf += 4;
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, (s32*)buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16);

View file

@ -25,6 +25,7 @@ namespace gl
u32 vertex_draw_count;
u32 allocated_vertex_count;
u32 vertex_index_base;
u32 vertex_index_offset;
u32 persistent_mapping_offset;
u32 volatile_mapping_offset;
std::optional<std::tuple<GLenum, u32> > index_info;

View file

@ -32,23 +32,6 @@ namespace
write_index_array_for_non_indexed_non_native_primitive_to_buffer(mapped_buffer, primitive_mode, vertex_count);
return std::make_tuple(element_count, mapping.second);
}
std::tuple<u32, u32, u32> upload_index_buffer(gsl::span<const gsl::byte> raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, u32 initial_vertex_count)
{
u32 min_index, max_index, vertex_draw_count = initial_vertex_count;
if (!gl::is_primitive_native(draw_mode))
vertex_draw_count = (u32)get_index_count(draw_mode, ::narrow<int>(vertex_draw_count));
u32 block_sz = vertex_draw_count * sizeof(u32); // Force u32 index size dest to avoid overflows when adding vertex base index
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), ::narrow<u32>(block_sz) };
std::tie(min_index, max_index, vertex_draw_count) = write_index_array_data_to_buffer(dst, raw_index_buffer,
type, draw_mode, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(),
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !gl::is_primitive_native(prim); });
return std::make_tuple(min_index, max_index, vertex_draw_count);
}
}
namespace
@ -69,6 +52,7 @@ namespace
u32 allocated_vertex_count;
u32 vertex_data_base;
u32 vertex_index_base;
u32 vertex_index_offset;
std::optional<std::tuple<GLenum, u32>> index_info;
};
@ -95,10 +79,10 @@ namespace
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer,
rsx::method_registers.current_draw_clause.get_elements_count());
return{ index_count, vertex_count, min_index, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
return{ index_count, vertex_count, min_index, 0, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
}
return{ vertex_count, vertex_count, min_index, 0, std::optional<std::tuple<GLenum, u32>>() };
return{ vertex_count, vertex_count, min_index, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
}
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
@ -108,6 +92,8 @@ namespace
rsx::index_array_type type = rsx::method_registers.current_draw_clause.is_immediate_draw?
rsx::index_array_type::u32:
rsx::method_registers.index_type();
u32 type_size = ::narrow<u32>(get_index_type_size(type));
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
u32 index_count = vertex_count;
@ -115,20 +101,27 @@ namespace
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
index_count = (u32)get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_count);
u32 max_size = index_count * sizeof(u32);
u32 max_size = index_count * type_size;
auto mapping = m_index_ring_buffer.alloc_from_heap(max_size, 256);
void* ptr = mapping.first;
u32 offset_in_index_buffer = mapping.second;
std::tie(min_index, max_index, index_count) = upload_index_buffer(
command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, vertex_count);
std::tie(min_index, max_index, index_count) = write_index_array_data_to_buffer(
{ reinterpret_cast<gsl::byte*>(ptr), max_size },
command.raw_index_buffer, type,
rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.restart_index_enabled(),
rsx::method_registers.restart_index(),
[](auto prim) { return !gl::is_primitive_native(prim); });
if (min_index >= max_index)
{
//empty set, do not draw
return{ 0, 0, 0, 0, std::make_tuple(GL_UNSIGNED_INT, offset_in_index_buffer) };
return{ 0, 0, 0, 0, 0, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
}
const auto index_offset = rsx::method_registers.vertex_data_base_index();
//check for vertex arrays with frequency modifiers
for (auto &block : m_vertex_layout.interleaved_blocks)
{
@ -136,13 +129,14 @@ namespace
{
//Ignore base offsets and return real results
//The upload function will optimize the uploaded range anyway
return{ index_count, max_index, 0, 0, std::make_tuple(GL_UNSIGNED_INT, offset_in_index_buffer) };
return{ index_count, max_index, 0, 0, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
}
}
//Prefer only reading the vertices that are referenced in the index buffer itself
//Offset data source by min_index verts, but also notify the shader to offset the vertexID
return{ index_count, (max_index - min_index + 1), min_index, min_index, std::make_tuple(GL_UNSIGNED_INT, offset_in_index_buffer) };
const auto data_offset = rsx::get_index_from_base(min_index, index_offset);
return{ index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
}
vertex_input_state operator()(const rsx::draw_inlined_array& command)
@ -157,10 +151,10 @@ namespace
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer, vertex_count);
return{ index_count, vertex_count, 0, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
return{ index_count, vertex_count, 0, 0, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
}
return{ vertex_count, vertex_count, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
return{ vertex_count, vertex_count, 0, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
}
private:
@ -183,7 +177,15 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};
gl::vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info };
gl::vertex_upload_info upload_info =
{
result.vertex_draw_count, // Vertex count
result.allocated_vertex_count, // Allocated vertex count
result.vertex_index_base, // Index of attribute at data location 0
result.vertex_index_offset, // Hw index offset
0u, 0u, // Mapping
result.index_info // Index buffer info
};
if (required.first > 0)
{
@ -197,7 +199,9 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
{
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
{
verify(HERE), cached->local_address == storage_address;

View file

@ -45,6 +45,7 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << "layout(std140, binding = 1) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " uint vertex_index_offset;\n";
OS << " uvec4 input_attributes_blob[16 / 2];\n";
OS << "};\n\n";
}

View file

@ -2735,7 +2735,8 @@ void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info)
auto mem = m_vertex_layout_ring_info.alloc<256>(256);
auto buf = (u32*)m_vertex_layout_ring_info.map(mem, 128 + 16);
*buf = vertex_info.vertex_index_base;
buf[0] = vertex_info.vertex_index_base;
buf[1] = vertex_info.vertex_index_offset;
buf += 4;
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, (s32*)buf,

View file

@ -26,6 +26,7 @@ namespace vk
u32 vertex_draw_count;
u32 allocated_vertex_count;
u32 vertex_index_base;
u32 vertex_index_offset;
u32 persistent_window_offset;
u32 volatile_window_offset;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;

View file

@ -86,6 +86,7 @@ namespace
u32 allocated_vertex_count;
u32 vertex_data_base;
u32 vertex_index_base;
u32 vertex_index_offset;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
};
@ -106,12 +107,6 @@ namespace
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
const u32 min_index = rsx::method_registers.current_draw_clause.min_index();
//if (rsx::method_registers.current_draw_clause.draw_command_ranges.size() > 1)
//{
// TODO
//LOG_ERROR(RSX, "REEEEEEEEEEEEEEEEEEEEEEE (prims_emulated=%d)", primitives_emulated);
//}
if (primitives_emulated)
{
u32 index_count;
@ -121,7 +116,7 @@ namespace
generate_emulating_index_buffer(rsx::method_registers.current_draw_clause,
vertex_count, m_index_buffer_ring_info);
return{ prims, index_count, vertex_count, min_index, 0, index_info };
return{ prims, index_count, vertex_count, min_index, 0, 0, index_info };
}
return{ prims, vertex_count, vertex_count, min_index, 0, {} };
@ -138,7 +133,7 @@ namespace
rsx::index_array_type::u32 :
rsx::method_registers.index_type();
constexpr u32 type_size = sizeof(u32); // Force u32 index size dest to avoid overflows when adding vertex base index
u32 type_size = gsl::narrow<u32>(get_index_type_size(index_type));
u32 index_count = rsx::method_registers.current_draw_clause.get_elements_count();
if (primitives_emulated)
@ -172,24 +167,33 @@ namespace
rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.restart_index_enabled(),
rsx::method_registers.restart_index(),
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !vk::is_primitive_native(prim); });
[](auto prim) { return !vk::is_primitive_native(prim); });
if (min_index >= max_index)
{
//empty set, do not draw
m_index_buffer_ring_info.unmap();
return{ prims, 0, 0, 0, 0, {} };
return{ prims, 0, 0, 0, 0, 0, {} };
}
if (emulate_restart)
{
index_count = rsx::remove_restart_index((u32*)buf, (u32*)tmp.data(), index_count, (u32)UINT32_MAX);
if (index_type == rsx::index_array_type::u16)
{
index_count = rsx::remove_restart_index((u16*)buf, (u16*)tmp.data(), index_count, (u16)UINT16_MAX);
}
else
{
index_count = rsx::remove_restart_index((u32*)buf, (u32*)tmp.data(), index_count, (u32)UINT32_MAX);
}
}
m_index_buffer_ring_info.unmap();
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT32);
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
const auto index_offset = rsx::method_registers.vertex_data_base_index();
//check for vertex arrays with frequency modifiers
for (auto &block : m_vertex_layout.interleaved_blocks)
@ -198,11 +202,12 @@ namespace
{
//Ignore base offsets and return real results
//The upload function will optimize the uploaded range anyway
return{ prims, index_count, max_index, 0, 0, index_info };
return{ prims, index_count, max_index, 0, 0, index_offset, index_info };
}
}
return {prims, index_count, (max_index - min_index + 1), min_index, min_index, index_info};
const auto data_offset = rsx::get_index_from_base(min_index, index_offset);
return {prims, index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, index_info};
}
vertex_input_state operator()(const rsx::draw_inlined_array& command)
@ -222,7 +227,7 @@ namespace
u32 index_count;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
std::tie(index_count, index_info) = generate_emulating_index_buffer(draw_clause, vertex_count, m_index_buffer_ring_info);
return{ prims, index_count, vertex_count, 0, 0, index_info };
return{ prims, index_count, vertex_count, 0, 0, 0, index_info };
}
private:
@ -256,7 +261,9 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
{
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
{
verify(HERE), cached->local_address == storage_address;
@ -349,5 +356,11 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
}
}
return{ result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, persistent_range_base, volatile_range_base, result.index_info };
return{ result.native_primitive_type, // Primitive
result.vertex_draw_count, // Vertex count
result.allocated_vertex_count, // Allocated vertex count
result.vertex_index_base, // Index of vertex at data location 0
result.vertex_index_offset, // Index offset
persistent_range_base, volatile_range_base, // Binding range
result.index_info }; // Index buffer info
}

View file

@ -44,6 +44,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << "layout(std140, set = 0, binding = 1) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " uint vertex_index_offset;\n";
OS << " uvec4 input_attributes_blob[16 / 2];\n";
OS << "};\n\n";

View file

@ -17,6 +17,7 @@
#include <cereal/types/unordered_map.hpp>
extern u64 get_system_time();
extern bool is_primitive_disjointed(rsx::primitive_type);
namespace rsx
{
@ -312,20 +313,7 @@ namespace rsx
draw_command_barriers.clear();
inline_vertex_array.clear();
switch (primitive)
{
case rsx::primitive_type::line_loop:
case rsx::primitive_type::line_strip:
case rsx::primitive_type::polygon:
case rsx::primitive_type::quad_strip:
case rsx::primitive_type::triangle_fan:
case rsx::primitive_type::triangle_strip:
// Adjacency matters for these types
is_disjoint_primitive = false;
break;
default:
is_disjoint_primitive = true;
}
is_disjoint_primitive = is_primitive_disjointed(primitive);
}
void begin()