mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
rsx: Fix index vertex array range with modulo calculation
This commit is contained in:
parent
ee3c7f335f
commit
a26b8dff18
2 changed files with 102 additions and 5 deletions
|
@ -62,11 +62,12 @@ namespace rsx
|
|||
u32 real_offset_address = 0;
|
||||
u8 memory_location = 0;
|
||||
u8 attribute_stride = 0;
|
||||
std::pair<u32, u32> vertex_range{};
|
||||
|
||||
rsx::simple_array<interleaved_attribute_t> locations;
|
||||
|
||||
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
|
||||
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const;
|
||||
std::pair<u32, u32> calculate_required_range(u32 first, u32 count);
|
||||
};
|
||||
|
||||
enum attribute_buffer_placement : u8
|
||||
|
@ -100,6 +101,7 @@ namespace rsx
|
|||
result->single_vertex = false;
|
||||
result->locations.clear();
|
||||
result->interleaved = true;
|
||||
result->vertex_range.second = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -269,8 +269,14 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count) const
|
||||
std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count)
|
||||
{
|
||||
if (vertex_range.second)
|
||||
{
|
||||
// Cached result
|
||||
return vertex_range;
|
||||
}
|
||||
|
||||
if (single_vertex)
|
||||
{
|
||||
return { 0, 1 };
|
||||
|
@ -280,10 +286,15 @@ namespace rsx
|
|||
u32 _max_index = 0;
|
||||
u32 _min_index = first;
|
||||
|
||||
u32 frequencies[rsx::limits::vertex_count];
|
||||
u32 freq_count = rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed ? 0 : u32{umax};
|
||||
u32 max_result_by_division = 0; // Guaranteed maximum
|
||||
|
||||
for (const auto &attrib : locations)
|
||||
{
|
||||
if (attrib.frequency <= 1) [[likely]]
|
||||
{
|
||||
freq_count = umax;
|
||||
_max_index = max_index;
|
||||
}
|
||||
else
|
||||
|
@ -294,12 +305,24 @@ namespace rsx
|
|||
{
|
||||
// Actually uses the modulo operator
|
||||
_min_index = 0;
|
||||
_max_index = attrib.frequency - 1;
|
||||
_max_index = std::max<u32>(_max_index, attrib.frequency - 1);
|
||||
|
||||
if (max_result_by_division < _max_index)
|
||||
{
|
||||
if (freq_count != umax)
|
||||
{
|
||||
if (std::find(frequencies, frequencies + freq_count, attrib.frequency) == frequencies + freq_count)
|
||||
{
|
||||
frequencies[freq_count++] = attrib.frequency;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Same as having no modulo
|
||||
_max_index = max_index;
|
||||
freq_count = umax;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -307,12 +330,84 @@ namespace rsx
|
|||
// Division operator
|
||||
_min_index = std::min(_min_index, first / attrib.frequency);
|
||||
_max_index = std::max<u32>(_max_index, utils::aligned_div(max_index, attrib.frequency));
|
||||
|
||||
if (freq_count > 0 && freq_count != umax)
|
||||
{
|
||||
const u32 max = utils::aligned_div(max_index, attrib.frequency);
|
||||
max_result_by_division = std::max<u32>(max_result_by_division, max);
|
||||
|
||||
// Discard lower frequencies because it has been proven that there are indices higher than them
|
||||
freq_count -= frequencies + freq_count - std::remove_if(frequencies, frequencies + freq_count, [&max_result_by_division](u32 freq)
|
||||
{
|
||||
return freq <= max_result_by_division;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (freq_count > 0 && freq_count != umax)
|
||||
{
|
||||
const rsx::index_array_type index_type = rsx::method_registers.current_draw_clause.is_immediate_draw ?
|
||||
rsx::index_array_type::u32 :
|
||||
rsx::method_registers.index_type();
|
||||
|
||||
const u32 index_size = index_type == rsx::index_array_type::u32 ? 4 : 2;
|
||||
|
||||
// If we can access a bit a more memory than required - do it
|
||||
// The alternative would be re-iterating again over all of them
|
||||
if (get_location(real_offset_address) == CELL_GCM_LOCATION_LOCAL)
|
||||
{
|
||||
const auto render = rsx::get_current_renderer();
|
||||
|
||||
if (utils::add_saturate<u32>(real_offset_address - rsx::constants::local_mem_base, (_max_index + 1) * attribute_stride) <= render->local_mem_size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (real_offset_address % 0x100000 + (_max_index + 1) * attribute_stride <= 0x100000)//(vm::check_addr(real_offset_address, vm::page_readable, (_max_index + 1) * attribute_stride))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
_max_index = 0;
|
||||
|
||||
// Force aligned indices as realhw
|
||||
const u32 address = (0 - index_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location());
|
||||
|
||||
auto re_evaluate = [&](auto ptr)
|
||||
{
|
||||
for (u32 _index = first; _index < first + count; _index++)
|
||||
{
|
||||
const auto value = ptr[_index];
|
||||
|
||||
for (u32 freq_it = 0; freq_it < freq_count; freq_it++)
|
||||
{
|
||||
const auto res = value % frequencies[freq_it];
|
||||
|
||||
if (res > _max_index)
|
||||
{
|
||||
_max_index = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (index_size == 4)
|
||||
{
|
||||
re_evaluate(vm::get_super_ptr<u32>(address));
|
||||
}
|
||||
else
|
||||
{
|
||||
re_evaluate(vm::get_super_ptr<u16>(address));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
ensure(_max_index >= _min_index);
|
||||
return { _min_index, (_max_index - _min_index) + 1 };
|
||||
vertex_range = { _min_index, (_max_index - _min_index) + 1 };
|
||||
return vertex_range;
|
||||
}
|
||||
|
||||
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size)
|
||||
|
@ -2798,7 +2893,7 @@ namespace rsx
|
|||
|
||||
if (persistent != nullptr)
|
||||
{
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
for (interleaved_range_info* block : layout.interleaved_blocks)
|
||||
{
|
||||
auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue