mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 12:05:23 +00:00
rsx: Refactor index buffers
- Index offset is ignored anyway and only used to calculate vertex attribute divisor index - Specialized optimization for untouched xfer without primitive restart
This commit is contained in:
parent
afeacc171f
commit
417a2e6731
14 changed files with 283 additions and 210 deletions
|
@ -206,12 +206,12 @@ namespace rsx
|
|||
auto fifo = vm::ptr<u16>::make(idxAddr);
|
||||
for (u32 i = 0; i < idxCount; ++i)
|
||||
{
|
||||
u32 index = fifo[i];
|
||||
if (is_primitive_restart_enabled && index == primitive_restart_index)
|
||||
u16 index = fifo[i];
|
||||
if (is_primitive_restart_enabled && (u32)index == primitive_restart_index)
|
||||
continue;
|
||||
index = get_index_from_base(index, method_registers.vertex_data_base_index());
|
||||
min_index = std::min(index, min_index);
|
||||
max_index = std::max(index, max_index);
|
||||
index = (u16)get_index_from_base(index, method_registers.vertex_data_base_index());
|
||||
min_index = (u16)std::min(index, (u16)min_index);
|
||||
max_index = (u16)std::max(index, (u16)max_index);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
#include "../rsx_methods.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
#define DEBUG_VERTEX_STREAMING 0
|
||||
|
||||
const bool s_use_ssse3 =
|
||||
|
@ -538,139 +540,176 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
|
|||
|
||||
namespace
|
||||
{
|
||||
template<typename T>
|
||||
std::tuple<u32, u32, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<u32> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index, u32 base_index)
|
||||
{
|
||||
u32 min_index = -1;
|
||||
u32 max_index = 0;
|
||||
|
||||
verify(HERE), (dst.size_bytes() >= src.size_bytes());
|
||||
|
||||
u32 dst_idx = 0;
|
||||
for (T index : src)
|
||||
template <typename T>
|
||||
constexpr T index_limit()
|
||||
{
|
||||
if (is_primitive_restart_enabled && (u32)index == primitive_restart_index)
|
||||
{
|
||||
// List types do not need primitive restart. Just skip over this instead
|
||||
if (rsx::method_registers.current_draw_clause.is_disjoint_primitive)
|
||||
continue;
|
||||
return std::numeric_limits<T>::max();
|
||||
}
|
||||
|
||||
dst[dst_idx++] = ~0u;
|
||||
template <typename T>
|
||||
const T& min_max(T& min, T& max, const T& value)
|
||||
{
|
||||
if (value < min)
|
||||
min = value;
|
||||
|
||||
if (value > max)
|
||||
max = value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
struct untouched_impl
|
||||
{
|
||||
template<typename T>
|
||||
static
|
||||
std::tuple<T, T, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<T> dst)
|
||||
{
|
||||
T min_index = index_limit<T>(), max_index = 0;
|
||||
u32 dst_index = 0;
|
||||
|
||||
for (const T index : src)
|
||||
{
|
||||
dst[dst_index++] = min_max(min_index, max_index, index);
|
||||
}
|
||||
|
||||
return std::make_tuple(min_index, max_index, dst_index);
|
||||
}
|
||||
};
|
||||
|
||||
struct primitive_restart_impl
|
||||
{
|
||||
template<typename T>
|
||||
static
|
||||
std::tuple<T, T, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, u32 restart_index, bool skip_restart)
|
||||
{
|
||||
T min_index = index_limit<T>(), max_index = 0;
|
||||
u32 dst_index = 0;
|
||||
|
||||
for (const T index : src)
|
||||
{
|
||||
if (index == restart_index)
|
||||
{
|
||||
if (!skip_restart)
|
||||
{
|
||||
dst[dst_index++] = index_limit<T>();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[dst_index++] = min_max(min_index, max_index, index);
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_tuple(min_index, max_index, dst_index);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
std::tuple<T, T, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, rsx::primitive_type draw_mode, bool is_primitive_restart_enabled, u32 primitive_restart_index)
|
||||
{
|
||||
if (LIKELY(!is_primitive_restart_enabled))
|
||||
{
|
||||
return untouched_impl::upload_untouched(src, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 new_index = rsx::get_index_from_base((u32)index, base_index);
|
||||
max_index = std::max(max_index, new_index);
|
||||
min_index = std::min(min_index, new_index);
|
||||
dst[dst_idx++] = new_index;
|
||||
return primitive_restart_impl::upload_untouched(src, dst, primitive_restart_index, is_primitive_disjointed(draw_mode));
|
||||
}
|
||||
}
|
||||
return std::make_tuple(min_index, max_index, dst_idx);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::tuple<u32, u32, u32> expand_indexed_triangle_fan(gsl::span<to_be_t<const T>> src, gsl::span<u32> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index, u32 base_index)
|
||||
{
|
||||
const u32 invalid_index = ~0u;
|
||||
|
||||
u32 min_index = invalid_index;
|
||||
u32 max_index = 0;
|
||||
|
||||
verify(HERE), (dst.size() >= 3 * (src.size() - 2));
|
||||
|
||||
u32 dst_idx = 0;
|
||||
u32 src_idx = 0;
|
||||
|
||||
bool needs_anchor = true;
|
||||
u32 anchor = invalid_index;
|
||||
u32 last_index = invalid_index;
|
||||
|
||||
for (size_t src_idx = 0; src_idx < src.size(); ++src_idx)
|
||||
template<typename T>
|
||||
std::tuple<T, T, u32> expand_indexed_triangle_fan(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index)
|
||||
{
|
||||
u32 index = src[src_idx];
|
||||
index = rsx::get_index_from_base(index, base_index);
|
||||
const T invalid_index = index_limit<T>();
|
||||
|
||||
if (needs_anchor)
|
||||
T min_index = invalid_index;
|
||||
T max_index = 0;
|
||||
|
||||
verify(HERE), (dst.size() >= 3 * (src.size() - 2));
|
||||
|
||||
u32 dst_idx = 0;
|
||||
u32 src_idx = 0;
|
||||
|
||||
bool needs_anchor = true;
|
||||
T anchor = invalid_index;
|
||||
T last_index = invalid_index;
|
||||
|
||||
for (const T index : src)
|
||||
{
|
||||
if (is_primitive_restart_enabled && (u32)src[src_idx] == primitive_restart_index)
|
||||
if (needs_anchor)
|
||||
{
|
||||
if (is_primitive_restart_enabled && index == primitive_restart_index)
|
||||
continue;
|
||||
|
||||
anchor = index;
|
||||
needs_anchor = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
anchor = index;
|
||||
needs_anchor = false;
|
||||
continue;
|
||||
}
|
||||
if (is_primitive_restart_enabled && index == primitive_restart_index)
|
||||
{
|
||||
needs_anchor = true;
|
||||
last_index = invalid_index;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_primitive_restart_enabled && (u32)src[src_idx] == primitive_restart_index)
|
||||
{
|
||||
needs_anchor = true;
|
||||
last_index = invalid_index;
|
||||
continue;
|
||||
}
|
||||
if (last_index == invalid_index)
|
||||
{
|
||||
//Need at least one anchor and one outer index to create a triangle
|
||||
last_index = index;
|
||||
continue;
|
||||
}
|
||||
|
||||
max_index = std::max(max_index, index);
|
||||
min_index = std::min(min_index, index);
|
||||
dst[dst_idx++] = anchor;
|
||||
dst[dst_idx++] = last_index;
|
||||
dst[dst_idx++] = min_max(min_index, max_index, index);
|
||||
|
||||
if (last_index == invalid_index)
|
||||
{
|
||||
//Need at least one anchor and one outer index to create a triangle
|
||||
last_index = index;
|
||||
continue;
|
||||
}
|
||||
|
||||
dst[dst_idx++] = anchor;
|
||||
dst[dst_idx++] = last_index;
|
||||
dst[dst_idx++] = index;
|
||||
|
||||
last_index = index;
|
||||
return std::make_tuple(min_index, max_index, dst_idx);
|
||||
}
|
||||
|
||||
return std::make_tuple(min_index, max_index, dst_idx);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::tuple<u32, u32, u32> expand_indexed_quads(gsl::span<to_be_t<const T>> src, gsl::span<u32> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index, u32 base_index)
|
||||
{
|
||||
u32 min_index = -1;
|
||||
u32 max_index = 0;
|
||||
|
||||
verify(HERE), (4 * dst.size_bytes() >= 6 * src.size_bytes());
|
||||
|
||||
u32 dst_idx = 0;
|
||||
u8 set_size = 0;
|
||||
u32 tmp_indices[4];
|
||||
|
||||
for (int src_idx = 0; src_idx < src.size(); ++src_idx)
|
||||
template<typename T>
|
||||
std::tuple<T, T, u32> expand_indexed_quads(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index)
|
||||
{
|
||||
u32 index = src[src_idx];
|
||||
index = rsx::get_index_from_base(index, base_index);
|
||||
if (is_primitive_restart_enabled && (u32)src[src_idx] == primitive_restart_index)
|
||||
T min_index = index_limit<T>();
|
||||
T max_index = 0;
|
||||
|
||||
verify(HERE), (4 * dst.size_bytes() >= 6 * src.size_bytes());
|
||||
|
||||
u32 dst_idx = 0;
|
||||
u8 set_size = 0;
|
||||
T tmp_indices[4];
|
||||
|
||||
for (const T index : src)
|
||||
{
|
||||
//empty temp buffer
|
||||
set_size = 0;
|
||||
continue;
|
||||
if (is_primitive_restart_enabled && index == primitive_restart_index)
|
||||
{
|
||||
//empty temp buffer
|
||||
set_size = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
tmp_indices[set_size++] = min_max(min_index, max_index, index);
|
||||
|
||||
if (set_size == 4)
|
||||
{
|
||||
// First triangle
|
||||
dst[dst_idx++] = tmp_indices[0];
|
||||
dst[dst_idx++] = tmp_indices[1];
|
||||
dst[dst_idx++] = tmp_indices[2];
|
||||
// Second triangle
|
||||
dst[dst_idx++] = tmp_indices[2];
|
||||
dst[dst_idx++] = tmp_indices[3];
|
||||
dst[dst_idx++] = tmp_indices[0];
|
||||
|
||||
set_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
tmp_indices[set_size++] = index;
|
||||
max_index = std::max(max_index, index);
|
||||
min_index = std::min(min_index, index);
|
||||
|
||||
if (set_size == 4)
|
||||
{
|
||||
// First triangle
|
||||
dst[dst_idx++] = tmp_indices[0];
|
||||
dst[dst_idx++] = tmp_indices[1];
|
||||
dst[dst_idx++] = tmp_indices[2];
|
||||
// Second triangle
|
||||
dst[dst_idx++] = tmp_indices[2];
|
||||
dst[dst_idx++] = tmp_indices[3];
|
||||
dst[dst_idx++] = tmp_indices[0];
|
||||
|
||||
set_size = 0;
|
||||
}
|
||||
return std::make_tuple(min_index, max_index, dst_idx);
|
||||
}
|
||||
|
||||
return std::make_tuple(min_index, max_index, dst_idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Only handle quads and triangle fan now
|
||||
|
@ -697,10 +736,21 @@ bool is_primitive_native(rsx::primitive_type draw_mode)
|
|||
fmt::throw_exception("Wrong primitive type" HERE);
|
||||
}
|
||||
|
||||
/** We assume that polygon is convex in polygon mode (constraints in OpenGL)
|
||||
*In such case polygon triangulation equates to triangle fan with arbitrary start vertex
|
||||
* see http://www.gamedev.net/page/resources/_/technical/graphics-programming-and-theory/polygon-triangulation-r3334
|
||||
*/
|
||||
bool is_primitive_disjointed(rsx::primitive_type draw_mode)
|
||||
{
|
||||
switch (draw_mode)
|
||||
{
|
||||
case rsx::primitive_type::line_loop:
|
||||
case rsx::primitive_type::line_strip:
|
||||
case rsx::primitive_type::polygon:
|
||||
case rsx::primitive_type::quad_strip:
|
||||
case rsx::primitive_type::triangle_fan:
|
||||
case rsx::primitive_type::triangle_strip:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
u32 get_index_count(rsx::primitive_type draw_mode, u32 initial_index_count)
|
||||
{
|
||||
|
@ -791,30 +841,35 @@ namespace
|
|||
return std::make_tuple(first, count);
|
||||
}
|
||||
|
||||
|
||||
// TODO: Unify indexed and non indexed primitive expansion ?
|
||||
template<typename T>
|
||||
std::tuple<u32, u32, u32> write_index_array_data_to_buffer_impl(gsl::span<u32> dst,
|
||||
std::tuple<T, T, u32> write_index_array_data_to_buffer_impl(gsl::span<T> dst,
|
||||
gsl::span<const be_t<T>> src,
|
||||
rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
|
||||
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
|
||||
std::function<bool(rsx::primitive_type)> expands)
|
||||
{
|
||||
if (!expands(draw_mode)) return upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
||||
if (LIKELY(!expands(draw_mode)))
|
||||
{
|
||||
return upload_untouched<T>(src, dst, draw_mode, restart_index_enabled, restart_index);
|
||||
}
|
||||
|
||||
switch (draw_mode)
|
||||
{
|
||||
case rsx::primitive_type::line_loop:
|
||||
{
|
||||
const auto &returnvalue = upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
||||
const auto &returnvalue = upload_untouched<T>(src, dst, draw_mode, restart_index_enabled, restart_index);
|
||||
const auto index_count = dst.size_bytes() / sizeof(T);
|
||||
dst[index_count] = src[0];
|
||||
return returnvalue;
|
||||
}
|
||||
case rsx::primitive_type::polygon:
|
||||
case rsx::primitive_type::triangle_fan:
|
||||
return expand_indexed_triangle_fan<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
||||
{
|
||||
return expand_indexed_triangle_fan<T>(src, dst, restart_index_enabled, restart_index);
|
||||
}
|
||||
case rsx::primitive_type::quads:
|
||||
return expand_indexed_quads<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
||||
{
|
||||
return expand_indexed_quads<T>(src, dst, restart_index_enabled, restart_index);
|
||||
}
|
||||
default:
|
||||
fmt::throw_exception("Unknown draw mode (0x%x)" HERE, (u32)draw_mode);
|
||||
}
|
||||
|
@ -824,23 +879,23 @@ namespace
|
|||
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst_ptr,
|
||||
gsl::span<const gsl::byte> src_ptr,
|
||||
rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
|
||||
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
|
||||
std::function<bool(rsx::primitive_type)> expands)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case rsx::index_array_type::u16:
|
||||
{
|
||||
return write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u32>(dst_ptr),
|
||||
as_const_span<const be_t<u16>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
|
||||
}
|
||||
case rsx::index_array_type::u32:
|
||||
{
|
||||
return write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst_ptr),
|
||||
as_const_span<const be_t<u32>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
|
||||
}
|
||||
default:
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
switch (type)
|
||||
{
|
||||
case rsx::index_array_type::u16:
|
||||
{
|
||||
return write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u16>(dst_ptr),
|
||||
as_const_span<const be_t<u16>>(src_ptr), draw_mode, restart_index_enabled, restart_index, expands);
|
||||
}
|
||||
case rsx::index_array_type::u32:
|
||||
{
|
||||
return write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst_ptr),
|
||||
as_const_span<const be_t<u32>>(src_ptr), draw_mode, restart_index_enabled, restart_index, expands);
|
||||
}
|
||||
default:
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
}
|
||||
|
||||
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)
|
||||
|
|
|
@ -17,6 +17,11 @@ void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::s
|
|||
*/
|
||||
bool is_primitive_native(rsx::primitive_type m_draw_mode);
|
||||
|
||||
/*
|
||||
* Returns true if adjacency information does not matter for this type. Allows optimizations e.g removal of primitive restart index
|
||||
*/
|
||||
bool is_primitive_disjointed(rsx::primitive_type draw_mode);
|
||||
|
||||
/**
|
||||
* Returns a fixed index count for emulated primitive, otherwise returns initial_index_count
|
||||
*/
|
||||
|
@ -34,7 +39,7 @@ u32 get_index_type_size(rsx::index_array_type type);
|
|||
*/
|
||||
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst, gsl::span<const gsl::byte> src,
|
||||
rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
|
||||
u32 base_index, std::function<bool(rsx::primitive_type)> expands);
|
||||
std::function<bool(rsx::primitive_type)> expands);
|
||||
|
||||
/**
|
||||
* Write index data needed to emulate non indexed non native primitive mode.
|
||||
|
|
|
@ -357,11 +357,11 @@ namespace glsl
|
|||
" //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"
|
||||
" if (desc.modulo)\n"
|
||||
" {\n"
|
||||
" vertex_id = " << vertex_id_name << " % int(desc.frequency);\n"
|
||||
" vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) % int(desc.frequency);\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" vertex_id = " << vertex_id_name << " / int(desc.frequency); \n"
|
||||
" vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) / int(desc.frequency); \n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
|
|
|
@ -377,7 +377,7 @@ namespace
|
|||
rsx::index_array_type::u32:
|
||||
rsx::method_registers.index_type();
|
||||
|
||||
constexpr size_t index_size = sizeof(u32); // Force u32 destination to avoid overflows when adding base
|
||||
size_t index_size = get_index_type_size(indexed_type);
|
||||
|
||||
// Alloc
|
||||
size_t buffer_size = align(index_count * index_size, 64);
|
||||
|
@ -395,12 +395,12 @@ namespace
|
|||
rsx::method_registers.current_draw_clause.primitive,
|
||||
rsx::method_registers.restart_index_enabled(),
|
||||
rsx::method_registers.restart_index(),
|
||||
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !is_primitive_native(prim); });
|
||||
[](auto prim) { return !is_primitive_native(prim); });
|
||||
|
||||
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
|
||||
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, (UINT)buffer_size,
|
||||
DXGI_FORMAT_R32_UINT};
|
||||
get_index_type(indexed_type)};
|
||||
// m_timers.buffer_upload_size += buffer_size;
|
||||
command_list->IASetIndexBuffer(&index_buffer_view);
|
||||
|
||||
|
|
|
@ -1421,10 +1421,13 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
|
|||
|
||||
// Vertex layout state
|
||||
auto mapping = m_vertex_layout_buffer->alloc_from_heap(128 + 16, m_uniform_buffer_offset_align);
|
||||
auto buf = static_cast<s32*>(mapping.first);
|
||||
*buf = upload_info.vertex_index_base;
|
||||
auto buf = static_cast<u32*>(mapping.first);
|
||||
|
||||
buf[0] = upload_info.vertex_index_base;
|
||||
buf[1] = upload_info.vertex_index_offset;
|
||||
buf += 4;
|
||||
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
|
||||
|
||||
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, (s32*)buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
|
||||
|
||||
m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16);
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ namespace gl
|
|||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
u32 persistent_mapping_offset;
|
||||
u32 volatile_mapping_offset;
|
||||
std::optional<std::tuple<GLenum, u32> > index_info;
|
||||
|
|
|
@ -32,23 +32,6 @@ namespace
|
|||
write_index_array_for_non_indexed_non_native_primitive_to_buffer(mapped_buffer, primitive_mode, vertex_count);
|
||||
return std::make_tuple(element_count, mapping.second);
|
||||
}
|
||||
|
||||
std::tuple<u32, u32, u32> upload_index_buffer(gsl::span<const gsl::byte> raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, u32 initial_vertex_count)
|
||||
{
|
||||
u32 min_index, max_index, vertex_draw_count = initial_vertex_count;
|
||||
|
||||
if (!gl::is_primitive_native(draw_mode))
|
||||
vertex_draw_count = (u32)get_index_count(draw_mode, ::narrow<int>(vertex_draw_count));
|
||||
|
||||
u32 block_sz = vertex_draw_count * sizeof(u32); // Force u32 index size dest to avoid overflows when adding vertex base index
|
||||
|
||||
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), ::narrow<u32>(block_sz) };
|
||||
std::tie(min_index, max_index, vertex_draw_count) = write_index_array_data_to_buffer(dst, raw_index_buffer,
|
||||
type, draw_mode, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(),
|
||||
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !gl::is_primitive_native(prim); });
|
||||
|
||||
return std::make_tuple(min_index, max_index, vertex_draw_count);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
|
@ -69,6 +52,7 @@ namespace
|
|||
u32 allocated_vertex_count;
|
||||
u32 vertex_data_base;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
std::optional<std::tuple<GLenum, u32>> index_info;
|
||||
};
|
||||
|
||||
|
@ -95,10 +79,10 @@ namespace
|
|||
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer,
|
||||
rsx::method_registers.current_draw_clause.get_elements_count());
|
||||
|
||||
return{ index_count, vertex_count, min_index, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
|
||||
return{ index_count, vertex_count, min_index, 0, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
return{ vertex_count, vertex_count, min_index, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
return{ vertex_count, vertex_count, min_index, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
|
||||
|
@ -108,6 +92,8 @@ namespace
|
|||
rsx::index_array_type type = rsx::method_registers.current_draw_clause.is_immediate_draw?
|
||||
rsx::index_array_type::u32:
|
||||
rsx::method_registers.index_type();
|
||||
|
||||
u32 type_size = ::narrow<u32>(get_index_type_size(type));
|
||||
|
||||
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
u32 index_count = vertex_count;
|
||||
|
@ -115,20 +101,27 @@ namespace
|
|||
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
|
||||
index_count = (u32)get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_count);
|
||||
|
||||
u32 max_size = index_count * sizeof(u32);
|
||||
u32 max_size = index_count * type_size;
|
||||
auto mapping = m_index_ring_buffer.alloc_from_heap(max_size, 256);
|
||||
void* ptr = mapping.first;
|
||||
u32 offset_in_index_buffer = mapping.second;
|
||||
|
||||
std::tie(min_index, max_index, index_count) = upload_index_buffer(
|
||||
command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, vertex_count);
|
||||
std::tie(min_index, max_index, index_count) = write_index_array_data_to_buffer(
|
||||
{ reinterpret_cast<gsl::byte*>(ptr), max_size },
|
||||
command.raw_index_buffer, type,
|
||||
rsx::method_registers.current_draw_clause.primitive,
|
||||
rsx::method_registers.restart_index_enabled(),
|
||||
rsx::method_registers.restart_index(),
|
||||
[](auto prim) { return !gl::is_primitive_native(prim); });
|
||||
|
||||
if (min_index >= max_index)
|
||||
{
|
||||
//empty set, do not draw
|
||||
return{ 0, 0, 0, 0, std::make_tuple(GL_UNSIGNED_INT, offset_in_index_buffer) };
|
||||
return{ 0, 0, 0, 0, 0, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
const auto index_offset = rsx::method_registers.vertex_data_base_index();
|
||||
|
||||
//check for vertex arrays with frequency modifiers
|
||||
for (auto &block : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
|
@ -136,13 +129,14 @@ namespace
|
|||
{
|
||||
//Ignore base offsets and return real results
|
||||
//The upload function will optimize the uploaded range anyway
|
||||
return{ index_count, max_index, 0, 0, std::make_tuple(GL_UNSIGNED_INT, offset_in_index_buffer) };
|
||||
return{ index_count, max_index, 0, 0, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
}
|
||||
}
|
||||
|
||||
//Prefer only reading the vertices that are referenced in the index buffer itself
|
||||
//Offset data source by min_index verts, but also notify the shader to offset the vertexID
|
||||
return{ index_count, (max_index - min_index + 1), min_index, min_index, std::make_tuple(GL_UNSIGNED_INT, offset_in_index_buffer) };
|
||||
const auto data_offset = rsx::get_index_from_base(min_index, index_offset);
|
||||
return{ index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_inlined_array& command)
|
||||
|
@ -157,10 +151,10 @@ namespace
|
|||
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
|
||||
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer, vertex_count);
|
||||
|
||||
return{ index_count, vertex_count, 0, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
|
||||
return{ index_count, vertex_count, 0, 0, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
return{ vertex_count, vertex_count, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
return{ vertex_count, vertex_count, 0, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -183,7 +177,15 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
|||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
|
||||
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};
|
||||
gl::vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info };
|
||||
gl::vertex_upload_info upload_info =
|
||||
{
|
||||
result.vertex_draw_count, // Vertex count
|
||||
result.allocated_vertex_count, // Allocated vertex count
|
||||
result.vertex_index_base, // Index of attribute at data location 0
|
||||
result.vertex_index_offset, // Hw index offset
|
||||
0u, 0u, // Mapping
|
||||
result.index_info // Index buffer info
|
||||
};
|
||||
|
||||
if (required.first > 0)
|
||||
{
|
||||
|
@ -197,7 +199,9 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
|||
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
|
||||
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
|
||||
{
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
|
||||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
|
||||
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
|
||||
{
|
||||
verify(HERE), cached->local_address == storage_address;
|
||||
|
|
|
@ -45,6 +45,7 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
|||
OS << "layout(std140, binding = 1) uniform VertexLayoutBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uvec4 input_attributes_blob[16 / 2];\n";
|
||||
OS << "};\n\n";
|
||||
}
|
||||
|
|
|
@ -2735,7 +2735,8 @@ void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info)
|
|||
auto mem = m_vertex_layout_ring_info.alloc<256>(256);
|
||||
auto buf = (u32*)m_vertex_layout_ring_info.map(mem, 128 + 16);
|
||||
|
||||
*buf = vertex_info.vertex_index_base;
|
||||
buf[0] = vertex_info.vertex_index_base;
|
||||
buf[1] = vertex_info.vertex_index_offset;
|
||||
buf += 4;
|
||||
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, (s32*)buf,
|
||||
|
|
|
@ -26,6 +26,7 @@ namespace vk
|
|||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
u32 persistent_window_offset;
|
||||
u32 volatile_window_offset;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
|
|
|
@ -86,6 +86,7 @@ namespace
|
|||
u32 allocated_vertex_count;
|
||||
u32 vertex_data_base;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
};
|
||||
|
||||
|
@ -106,12 +107,6 @@ namespace
|
|||
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
const u32 min_index = rsx::method_registers.current_draw_clause.min_index();
|
||||
|
||||
//if (rsx::method_registers.current_draw_clause.draw_command_ranges.size() > 1)
|
||||
//{
|
||||
// TODO
|
||||
//LOG_ERROR(RSX, "REEEEEEEEEEEEEEEEEEEEEEE (prims_emulated=%d)", primitives_emulated);
|
||||
//}
|
||||
|
||||
if (primitives_emulated)
|
||||
{
|
||||
u32 index_count;
|
||||
|
@ -121,7 +116,7 @@ namespace
|
|||
generate_emulating_index_buffer(rsx::method_registers.current_draw_clause,
|
||||
vertex_count, m_index_buffer_ring_info);
|
||||
|
||||
return{ prims, index_count, vertex_count, min_index, 0, index_info };
|
||||
return{ prims, index_count, vertex_count, min_index, 0, 0, index_info };
|
||||
}
|
||||
|
||||
return{ prims, vertex_count, vertex_count, min_index, 0, {} };
|
||||
|
@ -138,7 +133,7 @@ namespace
|
|||
rsx::index_array_type::u32 :
|
||||
rsx::method_registers.index_type();
|
||||
|
||||
constexpr u32 type_size = sizeof(u32); // Force u32 index size dest to avoid overflows when adding vertex base index
|
||||
u32 type_size = gsl::narrow<u32>(get_index_type_size(index_type));
|
||||
|
||||
u32 index_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
if (primitives_emulated)
|
||||
|
@ -172,24 +167,33 @@ namespace
|
|||
rsx::method_registers.current_draw_clause.primitive,
|
||||
rsx::method_registers.restart_index_enabled(),
|
||||
rsx::method_registers.restart_index(),
|
||||
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !vk::is_primitive_native(prim); });
|
||||
[](auto prim) { return !vk::is_primitive_native(prim); });
|
||||
|
||||
if (min_index >= max_index)
|
||||
{
|
||||
//empty set, do not draw
|
||||
m_index_buffer_ring_info.unmap();
|
||||
return{ prims, 0, 0, 0, 0, {} };
|
||||
return{ prims, 0, 0, 0, 0, 0, {} };
|
||||
}
|
||||
|
||||
if (emulate_restart)
|
||||
{
|
||||
index_count = rsx::remove_restart_index((u32*)buf, (u32*)tmp.data(), index_count, (u32)UINT32_MAX);
|
||||
if (index_type == rsx::index_array_type::u16)
|
||||
{
|
||||
index_count = rsx::remove_restart_index((u16*)buf, (u16*)tmp.data(), index_count, (u16)UINT16_MAX);
|
||||
}
|
||||
else
|
||||
{
|
||||
index_count = rsx::remove_restart_index((u32*)buf, (u32*)tmp.data(), index_count, (u32)UINT32_MAX);
|
||||
}
|
||||
}
|
||||
|
||||
m_index_buffer_ring_info.unmap();
|
||||
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
|
||||
std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT32);
|
||||
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
|
||||
|
||||
const auto index_offset = rsx::method_registers.vertex_data_base_index();
|
||||
|
||||
//check for vertex arrays with frequency modifiers
|
||||
for (auto &block : m_vertex_layout.interleaved_blocks)
|
||||
|
@ -198,11 +202,12 @@ namespace
|
|||
{
|
||||
//Ignore base offsets and return real results
|
||||
//The upload function will optimize the uploaded range anyway
|
||||
return{ prims, index_count, max_index, 0, 0, index_info };
|
||||
return{ prims, index_count, max_index, 0, 0, index_offset, index_info };
|
||||
}
|
||||
}
|
||||
|
||||
return {prims, index_count, (max_index - min_index + 1), min_index, min_index, index_info};
|
||||
const auto data_offset = rsx::get_index_from_base(min_index, index_offset);
|
||||
return {prims, index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, index_info};
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_inlined_array& command)
|
||||
|
@ -222,7 +227,7 @@ namespace
|
|||
u32 index_count;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
std::tie(index_count, index_info) = generate_emulating_index_buffer(draw_clause, vertex_count, m_index_buffer_ring_info);
|
||||
return{ prims, index_count, vertex_count, 0, 0, index_info };
|
||||
return{ prims, index_count, vertex_count, 0, 0, 0, index_info };
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -256,7 +261,9 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
|||
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
|
||||
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
|
||||
{
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
|
||||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
|
||||
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
|
||||
{
|
||||
verify(HERE), cached->local_address == storage_address;
|
||||
|
@ -349,5 +356,11 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
|||
}
|
||||
}
|
||||
|
||||
return{ result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, persistent_range_base, volatile_range_base, result.index_info };
|
||||
return{ result.native_primitive_type, // Primitive
|
||||
result.vertex_draw_count, // Vertex count
|
||||
result.allocated_vertex_count, // Allocated vertex count
|
||||
result.vertex_index_base, // Index of vertex at data location 0
|
||||
result.vertex_index_offset, // Index offset
|
||||
persistent_range_base, volatile_range_base, // Binding range
|
||||
result.index_info }; // Index buffer info
|
||||
}
|
||||
|
|
|
@ -44,6 +44,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
|||
OS << "layout(std140, set = 0, binding = 1) uniform VertexLayoutBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uvec4 input_attributes_blob[16 / 2];\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <cereal/types/unordered_map.hpp>
|
||||
|
||||
extern u64 get_system_time();
|
||||
extern bool is_primitive_disjointed(rsx::primitive_type);
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
@ -312,20 +313,7 @@ namespace rsx
|
|||
draw_command_barriers.clear();
|
||||
inline_vertex_array.clear();
|
||||
|
||||
switch (primitive)
|
||||
{
|
||||
case rsx::primitive_type::line_loop:
|
||||
case rsx::primitive_type::line_strip:
|
||||
case rsx::primitive_type::polygon:
|
||||
case rsx::primitive_type::quad_strip:
|
||||
case rsx::primitive_type::triangle_fan:
|
||||
case rsx::primitive_type::triangle_strip:
|
||||
// Adjacency matters for these types
|
||||
is_disjoint_primitive = false;
|
||||
break;
|
||||
default:
|
||||
is_disjoint_primitive = true;
|
||||
}
|
||||
is_disjoint_primitive = is_primitive_disjointed(primitive);
|
||||
}
|
||||
|
||||
void begin()
|
||||
|
|
Loading…
Add table
Reference in a new issue