mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
update
This commit is contained in:
parent
720dc9dd72
commit
1def3c49da
3 changed files with 1007 additions and 0 deletions
843
rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp
Normal file
843
rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp
Normal file
|
@ -0,0 +1,843 @@
|
|||
#include "stdafx.h"
|
||||
#include "RSXDrawCommands.h"
|
||||
|
||||
#include "Emu/RSX/Common/BufferUtils.h"
|
||||
#include "Emu/RSX/Common/buffer_stream.hpp"
|
||||
#include "Emu/RSX/Common/io_buffer.h"
|
||||
#include "Emu/RSX/Common/simple_array.hpp"
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
#include "Emu/RSX/Program/GLSLCommon.h"
|
||||
#include "Emu/RSX/rsx_methods.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
|
||||
#include "Emu/Memory/vm.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
void draw_command_processor::analyse_inputs_interleaved(vertex_input_layout& result, const vertex_program_metadata_t& vp_metadata)
|
||||
{
|
||||
const rsx_state& state = *REGS(m_ctx);
|
||||
const u32 input_mask = state.vertex_attrib_input_mask() & vp_metadata.referenced_inputs_mask;
|
||||
|
||||
result.clear();
|
||||
result.attribute_mask = static_cast<u16>(input_mask);
|
||||
|
||||
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
interleaved_range_info& info = *result.alloc_interleaved_block();
|
||||
info.interleaved = true;
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
auto& vinfo = state.vertex_arrays_info[index];
|
||||
result.attribute_placement[index] = attribute_buffer_placement::none;
|
||||
|
||||
if (vinfo.size() > 0)
|
||||
{
|
||||
// Stride must be updated even if the stream is disabled
|
||||
info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size());
|
||||
info.locations.push_back({ index, false, 1 });
|
||||
|
||||
if (input_mask & (1u << index))
|
||||
{
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
}
|
||||
else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index))
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
}
|
||||
|
||||
if (info.attribute_stride)
|
||||
{
|
||||
// At least one array feed must be enabled for vertex input
|
||||
result.interleaved_blocks.push_back(&info);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 frequency_divider_mask = REGS(m_ctx)->frequency_divider_operation_mask();
|
||||
result.interleaved_blocks.reserve(16);
|
||||
result.referenced_registers.reserve(16);
|
||||
|
||||
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
ensure(index < rsx::limits::vertex_count);
|
||||
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Nothing to do, uninitialized
|
||||
continue;
|
||||
}
|
||||
|
||||
// Always reset attribute placement by default
|
||||
result.attribute_placement[index] = attribute_buffer_placement::none;
|
||||
|
||||
// Check for interleaving
|
||||
if (REGS(m_ctx)->current_draw_clause.is_immediate_draw &&
|
||||
REGS(m_ctx)->current_draw_clause.command != rsx::draw_command::indexed)
|
||||
{
|
||||
// NOTE: In immediate rendering mode, all vertex setup is ignored
|
||||
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
|
||||
if (m_vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
// Ensure consistent number of vertices per attribute.
|
||||
m_vertex_push_buffers[index].pad_to(m_vertex_push_buffers[0].vertex_count, false);
|
||||
|
||||
// Read temp buffer (register array)
|
||||
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(m_vertex_push_buffers[index].data.size() * sizeof(u32)));
|
||||
result.volatile_blocks.push_back(volatile_range_info);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
else if (state.register_vertex_info[index].size > 0)
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
|
||||
// Fall back to the default register value if no source is specified via register
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& info = state.vertex_arrays_info[index];
|
||||
if (!info.size())
|
||||
{
|
||||
if (state.register_vertex_info[index].size > 0)
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result.attribute_placement[index] = attribute_buffer_placement::persistent;
|
||||
const u32 base_address = info.offset() & 0x7fffffff;
|
||||
bool alloc_new_block = true;
|
||||
bool modulo = !!(frequency_divider_mask & (1 << index));
|
||||
|
||||
for (auto& block : result.interleaved_blocks)
|
||||
{
|
||||
if (block->single_vertex)
|
||||
{
|
||||
// Single vertex definition, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (block->attribute_stride != info.stride())
|
||||
{
|
||||
// Stride does not match, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (base_address > block->base_offset)
|
||||
{
|
||||
const u32 diff = base_address - block->base_offset;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
// Not interleaved, continue
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 diff = block->base_offset - base_address;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
// Not interleaved, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
// Matches, and this address is lower than existing
|
||||
block->base_offset = base_address;
|
||||
}
|
||||
|
||||
alloc_new_block = false;
|
||||
block->locations.push_back({ index, modulo, info.frequency() });
|
||||
block->interleaved = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (alloc_new_block)
|
||||
{
|
||||
interleaved_range_info& block = *result.alloc_interleaved_block();
|
||||
block.base_offset = base_address;
|
||||
block.attribute_stride = info.stride();
|
||||
block.memory_location = info.offset() >> 31;
|
||||
block.locations.reserve(16);
|
||||
block.locations.push_back({ index, modulo, info.frequency() });
|
||||
|
||||
if (block.attribute_stride == 0)
|
||||
{
|
||||
block.single_vertex = true;
|
||||
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
|
||||
result.interleaved_blocks.push_back(&block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& info : result.interleaved_blocks)
|
||||
{
|
||||
// Calculate real data address to be used during upload
|
||||
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
|
||||
}
|
||||
}
|
||||
|
||||
std::span<const std::byte> draw_command_processor::get_raw_index_array(const draw_clause& draw_indexed_clause) const
|
||||
{
|
||||
if (!m_element_push_buffer.empty()) [[ unlikely ]]
|
||||
{
|
||||
// Indices provided via immediate mode
|
||||
return { reinterpret_cast<const std::byte*>(m_element_push_buffer.data()), ::narrow<u32>(m_element_push_buffer.size() * sizeof(u32)) };
|
||||
}
|
||||
|
||||
const rsx::index_array_type type = REGS(m_ctx)->index_type();
|
||||
const u32 type_size = get_index_type_size(type);
|
||||
|
||||
// Force aligned indices as realhw
|
||||
const u32 address = (0 - type_size) & get_address(REGS(m_ctx)->index_array_address(), REGS(m_ctx)->index_array_location());
|
||||
|
||||
const u32 first = draw_indexed_clause.min_index();
|
||||
const u32 count = draw_indexed_clause.get_elements_count();
|
||||
|
||||
const auto ptr = vm::_ptr<const std::byte>(address);
|
||||
return { ptr + first * type_size, count * type_size };
|
||||
}
|
||||
|
||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||
draw_command_processor::get_draw_command(const rsx::rsx_state& state) const
|
||||
{
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]]
|
||||
{
|
||||
return draw_indexed_array_command
|
||||
{
|
||||
get_raw_index_array(state.current_draw_clause)
|
||||
};
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::array)
|
||||
{
|
||||
return draw_array_command{};
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
return draw_inlined_array{};
|
||||
}
|
||||
|
||||
fmt::throw_exception("ill-formed draw command");
|
||||
}
|
||||
|
||||
void draw_command_processor::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
|
||||
{
|
||||
if (!(REGS(m_ctx)->vertex_attrib_input_mask() & (1 << attribute)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||
const auto vertex_id = m_vertex_push_buffers[0].get_vertex_id();
|
||||
m_vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
|
||||
RSX(m_ctx)->m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
|
||||
}
|
||||
|
||||
u32 draw_command_processor::get_push_buffer_vertex_count() const
|
||||
{
|
||||
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||
return m_vertex_push_buffers[0].vertex_count;
|
||||
}
|
||||
|
||||
void draw_command_processor::append_array_element(u32 index)
|
||||
{
|
||||
// Endianness is swapped because common upload code expects input in BE
|
||||
// TODO: Implement fast upload path for LE inputs and do away with this
|
||||
m_element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
|
||||
}
|
||||
|
||||
u32 draw_command_processor::get_push_buffer_index_count() const
|
||||
{
|
||||
return ::size32(m_element_push_buffer);
|
||||
}
|
||||
|
||||
void draw_command_processor::clear_push_buffers()
|
||||
{
|
||||
auto& graphics_state = RSX(m_ctx)->m_graphics_state;
|
||||
if (graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
|
||||
{
|
||||
for (auto& push_buf : m_vertex_push_buffers)
|
||||
{
|
||||
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
|
||||
//REGS(m_ctx)->register_vertex_info[index].size = 0;
|
||||
|
||||
push_buf.clear();
|
||||
}
|
||||
|
||||
graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty);
|
||||
}
|
||||
|
||||
m_element_push_buffer.clear();
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_vertex_layout_state(
|
||||
const vertex_input_layout& layout,
|
||||
const vertex_program_metadata_t& vp_metadata,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
s32* buffer,
|
||||
u32 persistent_offset_base,
|
||||
u32 volatile_offset_base) const
|
||||
{
|
||||
std::array<s32, 16> offset_in_block = {};
|
||||
u32 volatile_offset = volatile_offset_base;
|
||||
u32 persistent_offset = persistent_offset_base;
|
||||
|
||||
// NOTE: Order is important! Transient ayout is always push_buffers followed by register data
|
||||
if (REGS(m_ctx)->current_draw_clause.is_immediate_draw)
|
||||
{
|
||||
for (const auto& info : layout.volatile_blocks)
|
||||
{
|
||||
offset_in_block[info.first] = volatile_offset;
|
||||
volatile_offset += info.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (u8 index : layout.referenced_registers)
|
||||
{
|
||||
offset_in_block[index] = volatile_offset;
|
||||
volatile_offset += 16;
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto& block = layout.interleaved_blocks[0];
|
||||
u32 inline_data_offset = volatile_offset;
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
auto& info = REGS(m_ctx)->vertex_arrays_info[attrib.index];
|
||||
|
||||
offset_in_block[attrib.index] = inline_data_offset;
|
||||
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto& block : layout.interleaved_blocks)
|
||||
{
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
const u32 local_address = (REGS(m_ctx)->vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
|
||||
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
|
||||
}
|
||||
|
||||
const auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
persistent_offset += block->attribute_stride * range.second;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the data
|
||||
// Each descriptor field is 64 bits wide
|
||||
// [0-8] attribute stride
|
||||
// [8-24] attribute divisor
|
||||
// [24-27] attribute type
|
||||
// [27-30] attribute size
|
||||
// [30-31] reserved
|
||||
// [31-60] starting offset
|
||||
// [60-21] swap bytes flag
|
||||
// [61-22] volatile flag
|
||||
// [62-63] modulo enable flag
|
||||
|
||||
const s32 default_frequency_mask = (1 << 8);
|
||||
const s32 swap_storage_mask = (1 << 29);
|
||||
const s32 volatile_storage_mask = (1 << 30);
|
||||
const s32 modulo_op_frequency_mask = smin;
|
||||
|
||||
const u32 modulo_mask = REGS(m_ctx)->frequency_divider_operation_mask();
|
||||
const auto max_index = (first_vertex + vertex_count) - 1;
|
||||
|
||||
for (u16 ref_mask = vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Unused input, ignore this
|
||||
continue;
|
||||
}
|
||||
|
||||
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
|
||||
{
|
||||
static constexpr u64 zero = 0;
|
||||
std::memcpy(buffer + index * 2, &zero, sizeof(zero));
|
||||
continue;
|
||||
}
|
||||
|
||||
rsx::vertex_base_type type = {};
|
||||
s32 size = 0;
|
||||
s32 attrib0 = 0;
|
||||
s32 attrib1 = 0;
|
||||
|
||||
if (layout.attribute_placement[index] == attribute_buffer_placement::transient)
|
||||
{
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto& info = REGS(m_ctx)->vertex_arrays_info[index];
|
||||
|
||||
if (!info.size())
|
||||
{
|
||||
// Register
|
||||
const auto& reginfo = REGS(m_ctx)->register_vertex_info[index];
|
||||
type = reginfo.type;
|
||||
size = reginfo.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Array
|
||||
type = info.type();
|
||||
size = info.size();
|
||||
|
||||
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Data is either from an immediate render or register input
|
||||
// Immediate data overrides register input
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.is_immediate_draw &&
|
||||
m_vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
// Push buffer
|
||||
const auto& info = m_vertex_push_buffers[index];
|
||||
type = info.type;
|
||||
size = info.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Register
|
||||
const auto& info = REGS(m_ctx)->register_vertex_info[index];
|
||||
type = info.type;
|
||||
size = info.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
|
||||
}
|
||||
}
|
||||
|
||||
attrib1 |= volatile_storage_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto& info = REGS(m_ctx)->vertex_arrays_info[index];
|
||||
type = info.type();
|
||||
size = info.size();
|
||||
|
||||
auto stride = info.stride();
|
||||
attrib0 = stride;
|
||||
|
||||
if (stride > 0) // when stride is 0, input is not an array but a single element
|
||||
{
|
||||
const u32 frequency = info.frequency();
|
||||
switch (frequency)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
{
|
||||
attrib0 |= default_frequency_mask;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
if (modulo_mask & (1 << index))
|
||||
{
|
||||
if (max_index >= frequency)
|
||||
{
|
||||
// Only set modulo mask if a modulo op is actually necessary!
|
||||
// This requires that the uploaded range for this attr = [0, freq-1]
|
||||
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
|
||||
attrib0 |= (frequency << 8);
|
||||
attrib1 |= modulo_op_frequency_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
attrib0 |= default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Division
|
||||
attrib0 |= (frequency << 8);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end attribute placement check
|
||||
|
||||
// Special compressed 4 components into one 4-byte value. Decoded as one value.
|
||||
if (type == rsx::vertex_base_type::cmp)
|
||||
{
|
||||
size = 1;
|
||||
}
|
||||
|
||||
// All data is passed in in PS3-native order (BE) so swap flag should be set
|
||||
attrib1 |= swap_storage_mask;
|
||||
attrib0 |= (static_cast<s32>(type) << 24);
|
||||
attrib0 |= (size << 27);
|
||||
attrib1 |= offset_in_block[index];
|
||||
|
||||
buffer[index * 2 + 0] = attrib0;
|
||||
buffer[index * 2 + 1] = attrib1;
|
||||
}
|
||||
}
|
||||
|
||||
void draw_command_processor::write_vertex_data_to_memory(
|
||||
const vertex_input_layout& layout,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
void* persistent_data,
|
||||
void* volatile_data) const
|
||||
{
|
||||
auto transient = static_cast<char*>(volatile_data);
|
||||
auto persistent = static_cast<char*>(persistent_data);
|
||||
|
||||
auto& draw_call = REGS(m_ctx)->current_draw_clause;
|
||||
|
||||
if (transient != nullptr)
|
||||
{
|
||||
if (draw_call.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
for (const u8 index : layout.referenced_registers)
|
||||
{
|
||||
memcpy(transient, REGS(m_ctx)->register_vertex_info[index].data.data(), 16);
|
||||
transient += 16;
|
||||
}
|
||||
|
||||
memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32));
|
||||
// Is it possible to reference data outside of the inlined array?
|
||||
return;
|
||||
}
|
||||
|
||||
// NOTE: Order is important! Transient layout is always push_buffers followed by register data
|
||||
if (draw_call.is_immediate_draw)
|
||||
{
|
||||
// NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory
|
||||
for (const auto& info : layout.volatile_blocks)
|
||||
{
|
||||
memcpy(transient, m_vertex_push_buffers[info.first].data.data(), info.second);
|
||||
transient += info.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (const u8 index : layout.referenced_registers)
|
||||
{
|
||||
memcpy(transient, REGS(m_ctx)->register_vertex_info[index].data.data(), 16);
|
||||
transient += 16;
|
||||
}
|
||||
}
|
||||
|
||||
if (persistent != nullptr)
|
||||
{
|
||||
for (interleaved_range_info* block : layout.interleaved_blocks)
|
||||
{
|
||||
auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
|
||||
const u32 data_size = range.second * block->attribute_stride;
|
||||
const u32 vertex_base = range.first * block->attribute_stride;
|
||||
|
||||
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
|
||||
persistent += data_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_scale_offset_data(void* buffer, bool flip_y) const
|
||||
{
|
||||
const int clip_w = REGS(m_ctx)->surface_clip_width();
|
||||
const int clip_h = REGS(m_ctx)->surface_clip_height();
|
||||
|
||||
const float scale_x = REGS(m_ctx)->viewport_scale_x() / (clip_w / 2.f);
|
||||
float offset_x = REGS(m_ctx)->viewport_offset_x() - (clip_w / 2.f);
|
||||
offset_x /= clip_w / 2.f;
|
||||
|
||||
float scale_y = REGS(m_ctx)->viewport_scale_y() / (clip_h / 2.f);
|
||||
float offset_y = (REGS(m_ctx)->viewport_offset_y() - (clip_h / 2.f));
|
||||
offset_y /= clip_h / 2.f;
|
||||
if (flip_y) scale_y *= -1;
|
||||
if (flip_y) offset_y *= -1;
|
||||
|
||||
const float scale_z = REGS(m_ctx)->viewport_scale_z();
|
||||
const float offset_z = REGS(m_ctx)->viewport_offset_z();
|
||||
const float one = 1.f;
|
||||
|
||||
utils::stream_vector(buffer, std::bit_cast<u32>(scale_x), 0, 0, std::bit_cast<u32>(offset_x));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 16, 0, std::bit_cast<u32>(scale_y), 0, std::bit_cast<u32>(offset_y));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 32, 0, 0, std::bit_cast<u32>(scale_z), std::bit_cast<u32>(offset_z));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 48, 0, 0, 0, std::bit_cast<u32>(one));
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_user_clip_data(void* buffer) const
|
||||
{
|
||||
const rsx::user_clip_plane_op clip_plane_control[6] =
|
||||
{
|
||||
REGS(m_ctx)->clip_plane_0_enabled(),
|
||||
REGS(m_ctx)->clip_plane_1_enabled(),
|
||||
REGS(m_ctx)->clip_plane_2_enabled(),
|
||||
REGS(m_ctx)->clip_plane_3_enabled(),
|
||||
REGS(m_ctx)->clip_plane_4_enabled(),
|
||||
REGS(m_ctx)->clip_plane_5_enabled(),
|
||||
};
|
||||
|
||||
u8 data_block[64];
|
||||
s32* clip_enabled_flags = reinterpret_cast<s32*>(data_block);
|
||||
f32* clip_distance_factors = reinterpret_cast<f32*>(data_block + 32);
|
||||
|
||||
for (int index = 0; index < 6; ++index)
|
||||
{
|
||||
switch (clip_plane_control[index])
|
||||
{
|
||||
default:
|
||||
rsx_log.error("bad clip plane control (0x%x)", static_cast<u8>(clip_plane_control[index]));
|
||||
[[fallthrough]];
|
||||
|
||||
case rsx::user_clip_plane_op::disable:
|
||||
clip_enabled_flags[index] = 0;
|
||||
clip_distance_factors[index] = 0.f;
|
||||
break;
|
||||
|
||||
case rsx::user_clip_plane_op::greater_or_equal:
|
||||
clip_enabled_flags[index] = 1;
|
||||
clip_distance_factors[index] = 1.f;
|
||||
break;
|
||||
|
||||
case rsx::user_clip_plane_op::less_than:
|
||||
clip_enabled_flags[index] = 1;
|
||||
clip_distance_factors[index] = -1.f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(buffer, data_block, 2 * 8 * sizeof(u32));
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Buffer must be at least 512 float4 wide.
|
||||
*/
|
||||
void draw_command_processor::fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table) const
|
||||
{
|
||||
if (!reloc_table.empty()) [[ likely ]]
|
||||
{
|
||||
char* dst = reinterpret_cast<char*>(buffer);
|
||||
for (const auto& index : reloc_table)
|
||||
{
|
||||
utils::stream_vector_from_memory(dst, ®S(m_ctx)->transform_constants[index]);
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(buffer, REGS(m_ctx)->transform_constants.data(), 468 * 4 * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) const
|
||||
{
|
||||
ROP_control_t rop_control{};
|
||||
|
||||
if (REGS(m_ctx)->alpha_test_enabled())
|
||||
{
|
||||
const u32 alpha_func = static_cast<u32>(REGS(m_ctx)->alpha_func());
|
||||
rop_control.set_alpha_test_func(alpha_func);
|
||||
rop_control.enable_alpha_test();
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->polygon_stipple_enabled())
|
||||
{
|
||||
rop_control.enable_polygon_stipple();
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !RSX(m_ctx)->get_backend_config().supports_hw_a2c)
|
||||
{
|
||||
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
|
||||
// Alpha values generate a coverage mask for order independent blending
|
||||
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
|
||||
// Simulated using combined alpha blend and alpha test
|
||||
rop_control.enable_alpha_to_coverage();
|
||||
if (REGS(m_ctx)->msaa_sample_mask())
|
||||
{
|
||||
rop_control.enable_MSAA_writes();
|
||||
}
|
||||
|
||||
// Sample configuration bits
|
||||
switch (REGS(m_ctx)->surface_antialias())
|
||||
{
|
||||
case rsx::surface_antialiasing::center_1_sample:
|
||||
break;
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
rop_control.set_msaa_control(1u);
|
||||
break;
|
||||
default:
|
||||
rop_control.set_msaa_control(3u);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const f32 fog0 = REGS(m_ctx)->fog_params_0();
|
||||
const f32 fog1 = REGS(m_ctx)->fog_params_1();
|
||||
const u32 fog_mode = static_cast<u32>(REGS(m_ctx)->fog_equation());
|
||||
|
||||
// Check if framebuffer is actually an XRGB format and not a WZYX format
|
||||
switch (REGS(m_ctx)->surface_color())
|
||||
{
|
||||
case rsx::surface_color_format::w16z16y16x16:
|
||||
case rsx::surface_color_format::w32z32y32x32:
|
||||
case rsx::surface_color_format::x32:
|
||||
// These behave very differently from "normal" formats.
|
||||
break;
|
||||
default:
|
||||
// Integer framebuffer formats.
|
||||
rop_control.enable_framebuffer_INT();
|
||||
|
||||
// Check if we want sRGB conversion.
|
||||
if (REGS(m_ctx)->framebuffer_srgb_enabled())
|
||||
{
|
||||
rop_control.enable_framebuffer_sRGB();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Generate wpos coefficients
|
||||
// wpos equation is now as follows:
|
||||
// wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0)
|
||||
// wpos.x = (frag_coord / resolution_scale)
|
||||
// wpos.zw = frag_coord.zw
|
||||
|
||||
const auto window_origin = REGS(m_ctx)->shader_window_origin();
|
||||
const u32 window_height = REGS(m_ctx)->shader_window_height();
|
||||
const f32 resolution_scale = (window_height <= static_cast<u32>(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale();
|
||||
const f32 wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale);
|
||||
const f32 wpos_bias = (window_origin == rsx::window_origin::top) ? 0.f : window_height;
|
||||
const f32 alpha_ref = REGS(m_ctx)->alpha_ref();
|
||||
|
||||
u32* dst = static_cast<u32*>(buffer);
|
||||
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control.value, std::bit_cast<u32>(alpha_ref));
|
||||
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const
|
||||
{
|
||||
auto& draw_call = REGS(m_ctx)->current_draw_clause;
|
||||
|
||||
// Only call this for instanced draws!
|
||||
ensure(draw_call.is_trivial_instanced_draw);
|
||||
|
||||
// Temp indirection table. Used to track "running" updates.
|
||||
rsx::simple_array<u32> instancing_indirection_table;
|
||||
// indirection table size
|
||||
const auto reloc_table = prog.has_indexed_constants ? decltype(prog.constant_ids){} : prog.constant_ids;
|
||||
const auto redirection_table_size = prog.has_indexed_constants ? 468u : ::size32(prog.constant_ids);
|
||||
instancing_indirection_table.resize(redirection_table_size);
|
||||
|
||||
// Temp constants data
|
||||
rsx::simple_array<u128> constants_data;
|
||||
constants_data.reserve(redirection_table_size * draw_call.pass_count());
|
||||
|
||||
// Allocate indirection buffer on GPU stream
|
||||
indirection_table_buf.reserve(instancing_indirection_table.size_bytes() * draw_call.pass_count());
|
||||
auto indirection_out = indirection_table_buf.data<u32>();
|
||||
|
||||
rsx::instanced_draw_config_t instance_config;
|
||||
u32 indirection_table_offset = 0;
|
||||
|
||||
// We now replay the draw call here to pack the data.
|
||||
draw_call.begin();
|
||||
|
||||
// Write initial draw data.
|
||||
std::iota(instancing_indirection_table.begin(), instancing_indirection_table.end(), 0);
|
||||
|
||||
constants_data.resize(redirection_table_size);
|
||||
fill_vertex_program_constants_data(constants_data.data(), reloc_table);
|
||||
|
||||
// Next draw. We're guaranteed more than one draw call by the caller.
|
||||
draw_call.next();
|
||||
|
||||
do
|
||||
{
|
||||
// Write previous state
|
||||
std::memcpy(indirection_out + indirection_table_offset, instancing_indirection_table.data(), instancing_indirection_table.size_bytes());
|
||||
indirection_table_offset += redirection_table_size;
|
||||
|
||||
// Decode next draw state
|
||||
instance_config = {};
|
||||
draw_call.execute_pipeline_dependencies(m_ctx, &instance_config);
|
||||
|
||||
if (!instance_config.transform_constants_data_changed)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const int translated_offset = prog.has_indexed_constants
|
||||
? instance_config.patch_load_offset
|
||||
: prog.TranslateConstantsRange(instance_config.patch_load_offset, instance_config.patch_load_count);
|
||||
|
||||
if (translated_offset >= 0)
|
||||
{
|
||||
// Trivially patchable in bulk
|
||||
const u32 redirection_loc = ::size32(constants_data);
|
||||
constants_data.resize(::size32(constants_data) + instance_config.patch_load_count);
|
||||
std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[instance_config.patch_load_offset], instance_config.patch_load_count * sizeof(u128));
|
||||
|
||||
// Update indirection table
|
||||
for (auto i = translated_offset, count = 0;
|
||||
static_cast<u32>(count) < instance_config.patch_load_count;
|
||||
++i, ++count)
|
||||
{
|
||||
instancing_indirection_table[i] = redirection_loc + count;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
ensure(!prog.has_indexed_constants);
|
||||
|
||||
// Sparse update. Update records individually instead of bulk
|
||||
// FIXME: Range batching optimization
|
||||
const auto load_end = instance_config.patch_load_offset + instance_config.patch_load_count;
|
||||
for (u32 i = 0; i < redirection_table_size; ++i)
|
||||
{
|
||||
const auto read_index = prog.constant_ids[i];
|
||||
if (read_index < instance_config.patch_load_offset || read_index >= load_end)
|
||||
{
|
||||
// Reading outside "hot" range.
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 redirection_loc = ::size32(constants_data);
|
||||
constants_data.resize(::size32(constants_data) + 1);
|
||||
std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[read_index], sizeof(u128));
|
||||
|
||||
instancing_indirection_table[i] = redirection_loc;
|
||||
}
|
||||
|
||||
} while (draw_call.next());
|
||||
|
||||
// Tail
|
||||
ensure(indirection_table_offset < (instancing_indirection_table.size() * draw_call.pass_count()));
|
||||
std::memcpy(indirection_out + indirection_table_offset, instancing_indirection_table.data(), instancing_indirection_table.size_bytes());
|
||||
|
||||
// Now write the constants to the GPU buffer
|
||||
constants_data_array_buffer.reserve(constants_data.size_bytes());
|
||||
std::memcpy(constants_data_array_buffer.data(), constants_data.data(), constants_data.size_bytes());
|
||||
}
|
||||
}
|
110
rpcs3/Emu/RSX/Core/RSXDrawCommands.h
Normal file
110
rpcs3/Emu/RSX/Core/RSXDrawCommands.h
Normal file
|
@ -0,0 +1,110 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
|
||||
#include "Emu/RSX/Core/RSXVertexTypes.h"
|
||||
#include "Emu/RSX/NV47/FW/draw_call.hpp"
|
||||
#include "Emu/RSX/Program/ProgramStateCache.h"
|
||||
#include "Emu/RSX/rsx_vertex_data.h"
|
||||
|
||||
#include <span>
|
||||
#include <variant>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct rsx_state;
|
||||
struct context;
|
||||
class io_buffer;
|
||||
|
||||
class draw_command_processor
|
||||
{
|
||||
using vertex_program_metadata_t = program_hash_util::vertex_program_utils::vertex_program_metadata;
|
||||
|
||||
context* m_ctx = nullptr;
|
||||
|
||||
protected:
|
||||
friend class thread;
|
||||
|
||||
std::array<push_buffer_vertex_info, 16> m_vertex_push_buffers;
|
||||
rsx::simple_array<u32> m_element_push_buffer;
|
||||
|
||||
public:
|
||||
draw_command_processor() = default;
|
||||
|
||||
void init(context* ctx)
|
||||
{
|
||||
m_ctx = ctx;
|
||||
}
|
||||
|
||||
// Analyze vertex inputs and group all interleaved blocks
|
||||
void analyse_inputs_interleaved(vertex_input_layout& layout, const vertex_program_metadata_t& vp_metadata);
|
||||
|
||||
// Retrieve raw bytes for the index array (untyped)
|
||||
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
|
||||
|
||||
// Get compiled draw command for backend rendering
|
||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||
get_draw_command(const rsx::rsx_state& state) const;
|
||||
|
||||
// Push-buffers for immediate rendering (begin-end scopes)
|
||||
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
|
||||
|
||||
u32 get_push_buffer_vertex_count() const;
|
||||
|
||||
void append_array_element(u32 index);
|
||||
|
||||
u32 get_push_buffer_index_count() const;
|
||||
|
||||
void clear_push_buffers();
|
||||
|
||||
const std::span<const u32> element_push_buffer() const
|
||||
{
|
||||
return m_element_push_buffer;
|
||||
}
|
||||
|
||||
// Host driver helpers
|
||||
void fill_vertex_layout_state(
|
||||
const vertex_input_layout& layout,
|
||||
const vertex_program_metadata_t& vp_metadata,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
s32* buffer,
|
||||
u32 persistent_offset_base,
|
||||
u32 volatile_offset_base) const;
|
||||
|
||||
void write_vertex_data_to_memory(
|
||||
const vertex_input_layout& layout,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
void* persistent_data,
|
||||
void* volatile_data) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with 4x4 scale offset matrix.
|
||||
* Vertex shader's position is to be multiplied by this matrix.
|
||||
* if flip_y is set, the matrix is modified to use d3d convention.
|
||||
*/
|
||||
void fill_scale_offset_data(void* buffer, bool flip_y) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with user clip information
|
||||
*/
|
||||
void fill_user_clip_data(void* buffer) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Relocation table allows to do a partial fill with only selected registers.
|
||||
*/
|
||||
void fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with fragment rasterization state.
|
||||
* Fills current fog values, alpha test parameters and texture scaling parameters
|
||||
*/
|
||||
void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program) const;
|
||||
|
||||
// Fill instancing buffers. A single iobuf is used for both. 256byte alignment enforced to allow global bind
|
||||
// Returns offsets to the index redirection lookup table and constants field array
|
||||
void fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const;
|
||||
};
|
||||
}
|
54
rpcs3/Emu/RSX/Core/RSXDriverState.h
Normal file
54
rpcs3/Emu/RSX/Core/RSXDriverState.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
enum pipeline_state : u32
|
||||
{
|
||||
fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed
|
||||
vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed
|
||||
fragment_program_state_dirty = (1 << 2), // Fragment program state changed
|
||||
vertex_program_state_dirty = (1 << 3), // Vertex program state changed
|
||||
fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc)
|
||||
vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc)
|
||||
transform_constants_dirty = (1 << 6), // Transform constants changed
|
||||
fragment_constants_dirty = (1 << 7), // Fragment constants changed
|
||||
framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed
|
||||
fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed
|
||||
vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed
|
||||
scissor_config_state_dirty = (1 << 11), // Scissor region changed
|
||||
zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed
|
||||
|
||||
scissor_setup_invalid = (1 << 13), // Scissor configuration is broken
|
||||
scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint
|
||||
|
||||
polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed
|
||||
line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed
|
||||
|
||||
push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers)
|
||||
|
||||
polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed
|
||||
depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed
|
||||
|
||||
pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint.
|
||||
|
||||
rtt_config_dirty = (1 << 21), // Render target configuration changed
|
||||
rtt_config_contested = (1 << 22), // Render target configuration is indeterminate
|
||||
rtt_config_valid = (1 << 23), // Render target configuration is valid
|
||||
rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate
|
||||
|
||||
xform_instancing_state_dirty = (1 << 25), // Transform instancing state has changed
|
||||
|
||||
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
|
||||
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
|
||||
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty | xform_instancing_state_dirty,
|
||||
invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty,
|
||||
memory_barrier_bits = framebuffer_reads_dirty,
|
||||
|
||||
// Vulkan-specific signals
|
||||
invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty,
|
||||
|
||||
all_dirty = ~0u
|
||||
};
|
||||
}
|
Loading…
Add table
Reference in a new issue