rsx: Implement skip draw. Also, start working on MT vertex upload

This commit is contained in:
kd-11 2017-06-25 23:14:56 +03:00
parent ab97a0a5a3
commit b95ffaf4dd
12 changed files with 730 additions and 232 deletions

View file

@ -212,7 +212,7 @@ void GLGSRender::begin()
__glcheck glDepthBoundsEXT(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
}
__glcheck glDepthRange(rsx::method_registers.clip_min(), rsx::method_registers.clip_max());
//__glcheck glDepthRange(rsx::method_registers.clip_min(), rsx::method_registers.clip_max());
__glcheck enable(rsx::method_registers.dither_enabled(), GL_DITHER);
if (__glcheck enable(rsx::method_registers.blend_enabled(), GL_BLEND))
@ -373,8 +373,13 @@ void GLGSRender::end()
int location;
if (!rsx::method_registers.fragment_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, 0);
if (m_textures_dirty[i])
{
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, 0);
m_textures_dirty[i] = false;
}
continue;
}
@ -392,12 +397,12 @@ void GLGSRender::end()
int texture_index = i + rsx::limits::fragment_textures_count;
int location;
if (!rsx::method_registers.vertex_textures[i].enabled())
/* if (!rsx::method_registers.vertex_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + texture_index);
glBindTexture(GL_TEXTURE_2D, 0);
continue;
}
} */
if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
{
@ -409,10 +414,20 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
u32 vertex_draw_count;
u32 vertex_draw_count = m_last_vertex_count;
std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
std::tie(vertex_draw_count, indexed_draw_info) = set_vertex_buffer();
m_vao.bind();
bool skip_upload = false;
if (!is_probable_instanced_draw())
{
std::tie(vertex_draw_count, indexed_draw_info) = set_vertex_buffer();
m_last_vertex_count = vertex_draw_count;
}
else
{
//LOG_ERROR(RSX, "No work is needed for this draw call! Muhahahahahahaha");
skip_upload = true;
}
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
@ -427,19 +442,28 @@ void GLGSRender::end()
m_index_ring_buffer->unmap();
}
if (indexed_draw_info)
if (indexed_draw_info || (skip_upload && m_last_draw_indexed == true))
{
if (__glcheck enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
{
GLenum index_type = std::get<0>(indexed_draw_info.value());
GLenum index_type = (skip_upload)? m_last_ib_type: std::get<0>(indexed_draw_info.value());
__glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
}
__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, std::get<0>(indexed_draw_info.value()), (GLvoid *)(std::ptrdiff_t)std::get<1>(indexed_draw_info.value()));
m_last_draw_indexed = true;
if (!skip_upload)
{
m_last_ib_type = std::get<0>(indexed_draw_info.value());
m_last_index_offset = std::get<1>(indexed_draw_info.value());
}
__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, m_last_ib_type, (GLvoid *)(std::ptrdiff_t)m_last_index_offset);
}
else
{
draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count);
m_last_draw_indexed = false;
}
m_attrib_ring_buffer->notify();

View file

@ -75,6 +75,11 @@ private:
bool flush_draw_buffers = false;
bool m_last_draw_indexed;
GLenum m_last_ib_type;
size_t m_last_index_offset;
u32 m_last_vertex_count;
public:
gl::fbo draw_fbo;

View file

@ -307,6 +307,32 @@ namespace rsx
return (u32)element_push_buffer.size();
}
bool thread::is_probable_instanced_draw()
{
if (!g_cfg.video.batch_instanced_geometry)
return false;
//If the array registers have not been touched, the index array has also not been touched via notify or via register set, its likely an instanced draw
//gcm lib will set the registers once and then call the same draw command over and over with different transform params to achieve this
if (m_index_buffer_changed || m_vertex_attribs_changed)
return false;
auto& draw_clause = rsx::method_registers.current_draw_clause;
if (draw_clause.command != m_last_command)
return false;
if (draw_clause.command != rsx::draw_command::inlined_array)
{
if (draw_clause.first_count_commands.back().second != m_last_first_count.second ||
draw_clause.first_count_commands.front().first != m_last_first_count.first)
return false;
}
else if (m_last_first_count.second != draw_clause.inline_vertex_array.size())
return false;
return true;
}
void thread::end()
{
rsx::method_registers.transform_constants.clear();
@ -327,6 +353,17 @@ namespace rsx
u32 element_count = rsx::method_registers.current_draw_clause.get_elements_count();
capture_frame("Draw " + rsx::to_string(rsx::method_registers.current_draw_clause.primitive) + std::to_string(element_count));
}
auto& clause = rsx::method_registers.current_draw_clause;
m_last_command = clause.command;
if (m_last_command == rsx::draw_command::inlined_array)
m_last_first_count = std::make_pair(0, (u32)clause.inline_vertex_array.size());
else
m_last_first_count = std::make_pair(clause.first_count_commands.front().first, clause.first_count_commands.back().second);
m_index_buffer_changed = false;
m_vertex_attribs_changed = false;
}
void thread::on_task()
@ -499,6 +536,17 @@ namespace rsx
m_vblank_thread->join();
m_vblank_thread.reset();
}
if (m_vertex_streaming_task.processing_threads.size() > 0)
{
for (auto &thr : m_vertex_streaming_task.processing_threads)
{
thr->join();
thr.reset();
}
m_vertex_streaming_task.processing_threads.resize(0);
}
}
std::string thread::get_name() const
@ -1072,4 +1120,112 @@ namespace rsx
fmt::throw_exception("%s(addr=0x%x): RSXIO memory not mapped" HERE, __FUNCTION__, addr);
}
}
void thread::post_vertex_stream_to_upload(gsl::span<const gsl::byte> src, gsl::span<gsl::byte> dst, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, std::function<void(void *, rsx::vertex_base_type, u8, u32)> callback)
{
upload_stream_packet packet;
packet.dst_span = dst;
packet.src_span = src;
packet.src_stride = attribute_src_stride;
packet.type = type;
packet.dst_stride = dst_stride;
packet.vector_width = vector_element_count;
packet.post_upload_func = callback;
m_vertex_streaming_task.packets.push_back(packet);
}
void thread::start_vertex_upload_task(u32 vertex_count)
{
if (m_vertex_streaming_task.processing_threads.size() == 0)
{
const u32 streaming_thread_count = (u32)g_cfg.video.vertex_upload_threads;
m_vertex_streaming_task.processing_threads.resize(streaming_thread_count);
for (u32 n = 0; n < streaming_thread_count; ++n)
{
thread_ctrl::spawn(m_vertex_streaming_task.processing_threads[n], "Vertex Stream " + std::to_string(n), [this, n]()
{
auto &task = m_vertex_streaming_task;
const u32 index = n;
while (!Emu.IsStopped())
{
if (task.remaining_packets != 0)
{
//Wait for me!
task.ready_threads--;
const size_t step = task.processing_threads.size();
const size_t job_count = task.packets.size();
//Process every nth packet
size_t current_job = index;
while (true)
{
if (current_job >= job_count)
break;
auto &packet = task.packets[current_job];
write_vertex_array_data_to_buffer(packet.dst_span, packet.src_span, task.vertex_count, packet.type, packet.vector_width, packet.src_stride, packet.dst_stride);
if (packet.post_upload_func)
packet.post_upload_func(packet.dst_span.data(), packet.type, (u8)packet.vector_width, task.vertex_count);
_mm_sfence();
task.remaining_packets--;
current_job += step;
}
_mm_mfence();
while (task.remaining_packets > 0 && !Emu.IsStopped())
{
_mm_lfence();
std::this_thread::sleep_for(0us);
}
_mm_sfence();
task.ready_threads++;
}
else
std::this_thread::sleep_for(0us);
//thread_ctrl::wait();
//busy_wait();
}
});
}
}
while (m_vertex_streaming_task.ready_threads != 0 && !Emu.IsStopped())
{
_mm_lfence();
busy_wait();
}
m_vertex_streaming_task.vertex_count = vertex_count;
m_vertex_streaming_task.ready_threads = 0;
m_vertex_streaming_task.remaining_packets = (int)m_vertex_streaming_task.packets.size();
}
void thread::wait_for_vertex_upload_task()
{
while (m_vertex_streaming_task.remaining_packets > 0 && !Emu.IsStopped())
{
_mm_lfence();
busy_wait();
}
m_vertex_streaming_task.packets.resize(0);
}
bool thread::vertex_upload_task_ready()
{
if (g_cfg.video.vertex_upload_threads < 2)
return false;
return (m_vertex_streaming_task.remaining_packets == 0 && m_vertex_streaming_task.ready_threads == 0);
}
}

View file

@ -4,6 +4,7 @@
#include <deque>
#include <set>
#include <mutex>
#include <atomic>
#include "GCM.h"
#include "rsx_cache.h"
#include "RSXTexture.h"
@ -148,6 +149,8 @@ namespace rsx
bool m_rtts_dirty;
bool m_transform_constants_dirty;
bool m_textures_dirty[16];
bool m_vertex_attribs_changed;
bool m_index_buffer_changed;
protected:
std::array<u32, 4> get_color_surface_addresses() const;
u32 get_zeta_surface_address() const;
@ -221,6 +224,44 @@ namespace rsx
void append_array_element(u32 index);
u32 get_push_buffer_index_count() const;
protected:
//Save draw call parameters to detect instanced renders
std::pair<u32, u32> m_last_first_count;
rsx::draw_command m_last_command;
bool is_probable_instanced_draw();
public:
//MT vertex streaming
struct upload_stream_packet
{
std::function<void(void *, rsx::vertex_base_type, u8, u32)> post_upload_func;
gsl::span<const gsl::byte> src_span;
gsl::span<gsl::byte> dst_span;
rsx::vertex_base_type type;
u32 vector_width;
u32 src_stride;
u8 dst_stride;
};
struct upload_stream_task
{
std::vector<upload_stream_packet> packets;
std::atomic<int> remaining_packets = { 0 };
std::atomic<int> ready_threads = { 0 };
std::atomic<u32> vertex_count;
std::vector<std::shared_ptr<thread_ctrl>> processing_threads;
};
upload_stream_task m_vertex_streaming_task;
void post_vertex_stream_to_upload(gsl::span<const gsl::byte> src, gsl::span<gsl::byte> dst, rsx::vertex_base_type type,
u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride,
std::function<void(void *, rsx::vertex_base_type, u8, u32)> callback);
void start_vertex_upload_task(u32 vertex_count);
void wait_for_vertex_upload_task();
bool vertex_upload_task_ready();
private:
std::mutex m_mtx_task;

View file

@ -637,6 +637,7 @@ VKGSRender::~VKGSRender()
}
//Close recording and wait for all to finish
close_render_pass();
CHECK_RESULT(vkEndCommandBuffer(*m_current_command_buffer));
for (auto &cb : m_primary_cb_list)
@ -753,12 +754,18 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
}
//This is awful!
while (m_flush_commands) _mm_pause();
while (m_flush_commands)
{
_mm_lfence();
_mm_pause();
}
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
m_queued_threads--;
_mm_sfence();
return status;
}
else
@ -800,6 +807,7 @@ void VKGSRender::begin()
flush_command_queue(true);
CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
m_last_descriptor_set = VK_NULL_HANDLE;
m_used_descriptors = 0;
m_uniform_buffer_ring_info.reset_allocation_stats();
@ -811,8 +819,6 @@ void VKGSRender::begin()
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
}
std::chrono::time_point<steady_clock> start = steady_clock::now();
VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = descriptor_pool;
alloc_info.descriptorSetCount = 1;
@ -823,6 +829,9 @@ void VKGSRender::begin()
CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set));
descriptor_sets = new_descriptor_set;
m_used_descriptors++;
std::chrono::time_point<steady_clock> start = steady_clock::now();
init_buffers();
@ -834,26 +843,116 @@ void VKGSRender::begin()
std::chrono::time_point<steady_clock> stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
m_used_descriptors++;
}
void VKGSRender::end()
void VKGSRender::emit_geometry_instance(u32/* instance_count*/)
{
begin_render_pass();
m_instanced_draws++;
//Repeat last command
if (!m_last_draw_indexed)
vkCmdDraw(*m_current_command_buffer, m_last_vertex_count, 1, 0, 0);
else
{
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, m_last_ib_offset, m_last_ib_type);
vkCmdDrawIndexed(*m_current_command_buffer, m_last_vertex_count, 1, 0, 0, 0);
}
}
void VKGSRender::begin_render_pass()
{
if (render_pass_open)
return;
size_t idx = vk::get_render_pass_location(
vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first,
vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()),
(u8)vk::get_draw_buffers(rsx::method_registers.surface_color_target()).size());
VkRenderPass current_render_pass = m_render_passes[idx];
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = current_render_pass;
rp_begin.framebuffer = m_framebuffer_to_clean.back()->value;
rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = m_framebuffer_to_clean.back()->width();
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height();
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
render_pass_open = true;
}
void VKGSRender::close_render_pass()
{
if (!render_pass_open)
return;
vkCmdEndRenderPass(*m_current_command_buffer);
render_pass_open = false;
}
void VKGSRender::end()
{
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
const bool is_instanced = is_probable_instanced_draw() && m_last_descriptor_set != VK_NULL_HANDLE && m_program != nullptr;
if (is_instanced)
{
//Copy descriptor set
VkCopyDescriptorSet copy_info[39];
u8 descriptors_count = 0;
for (u8 i = 0; i < 39; ++i)
{
if ((m_program->attribute_location_mask & (1ull << i)) == 0)
continue;
const u8 n = descriptors_count;
copy_info[n] = {};
copy_info[n].sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
copy_info[n].srcSet = m_last_descriptor_set;
copy_info[n].dstSet = descriptor_sets;
copy_info[n].srcBinding = i;
copy_info[n].dstBinding = i;
copy_info[n].srcArrayElement = 0;
copy_info[n].dstArrayElement = 0;
copy_info[n].descriptorCount = 1;
descriptors_count++;
}
vkUpdateDescriptorSets(*m_device, 0, nullptr, descriptors_count, (const VkCopyDescriptorSet*)&copy_info);
}
//Load program here since it is dependent on vertex state
load_program();
load_program(is_instanced);
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
if (is_instanced)
{
//Only the program constants descriptors should have changed
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
emit_geometry_instance(1);
m_last_instanced_cb_index = m_current_cb_index;
rsx::thread::end();
return;
}
close_render_pass(); //Texture upload stuff conflicts active RPs
std::chrono::time_point<steady_clock> vertex_start0 = steady_clock::now();
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end0 = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end0 - vertex_start0).count();
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
@ -948,21 +1047,7 @@ void VKGSRender::end()
//Only textures are synchronized tightly with the GPU and they have been read back above
vk::enter_uninterruptible();
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = current_render_pass;
rp_begin.framebuffer = m_framebuffer_to_clean.back()->value;
rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = m_framebuffer_to_clean.back()->width();
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height();
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
begin_render_pass();
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
@ -984,8 +1069,20 @@ void VKGSRender::end()
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<2>(upload_info);
if (m_attrib_ring_info.mapped)
{
wait_for_vertex_upload_task();
m_attrib_ring_info.unmap();
}
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
if (!index_info)
{
vkCmdDraw(*m_current_command_buffer, std::get<1>(upload_info), 1, 0, 0);
m_last_draw_indexed = false;
}
else
{
VkIndexType index_type;
@ -996,9 +1093,15 @@ void VKGSRender::end()
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
m_last_draw_indexed = false;
m_last_ib_type = index_type;
m_last_ib_offset = offset;
m_last_vertex_count = index_count;
}
vkCmdEndRenderPass(*m_current_command_buffer);
m_last_instanced_cb_index = ~0;
m_last_descriptor_set = descriptor_sets;
vk::leave_uninterruptible();
@ -1139,27 +1242,9 @@ void VKGSRender::clear_surface(u32 mask)
clear_regions.push_back(region);
}
size_t idx = vk::get_render_pass_location(
vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first,
vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, surface_depth_format),
(u8)targets.size());
VkRenderPass current_render_pass = m_render_passes[idx];
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = current_render_pass;
rp_begin.framebuffer = m_framebuffer_to_clean.back()->value;
rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = m_framebuffer_to_clean.back()->width();
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height();
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
begin_render_pass();
vkCmdClearAttachments(*m_current_command_buffer, (u32)clear_descriptors.size(), clear_descriptors.data(), (u32)clear_regions.size(), clear_regions.data());
vkCmdEndRenderPass(*m_current_command_buffer);
if (mask & 0x3)
{
if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0)
@ -1217,6 +1302,7 @@ void VKGSRender::copy_render_targets_to_dma_location()
void VKGSRender::flush_command_queue(bool hard_sync)
{
close_render_pass();
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
if (hard_sync)
@ -1325,7 +1411,11 @@ void VKGSRender::do_local_task()
flush_command_queue();
m_flush_commands = false;
while (m_queued_threads) _mm_pause();
while (m_queued_threads)
{
_mm_lfence();
_mm_pause();
}
}
}
@ -1345,177 +1435,185 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
}
}
bool VKGSRender::load_program()
bool VKGSRender::load_program(bool fast_update)
{
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
RSXVertexProgram vertex_program;
RSXFragmentProgram fragment_program;
if (!fast_update)
{
vk::render_target *surface = nullptr;
if (!is_depth)
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
vk::render_target *surface = nullptr;
if (!is_depth)
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
else
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
if (!surface) return std::make_tuple(false, 0);
return std::make_tuple(true, surface->native_pitch);
};
vertex_program = get_current_vertex_program();
fragment_program = get_current_fragment_program(rtt_lookup_func);
vk::pipeline_props properties = {};
properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
bool unused;
properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, unused);
if (rsx::method_registers.restart_index_enabled())
{
properties.ia.primitiveRestartEnable = VK_TRUE;
}
else
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
if (!surface) return std::make_tuple(false, 0);
return std::make_tuple(true, surface->native_pitch);
};
RSXVertexProgram vertex_program = get_current_vertex_program();
RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func);
vk::pipeline_props properties = {};
properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
bool unused;
properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, unused);
if (rsx::method_registers.restart_index_enabled())
{
properties.ia.primitiveRestartEnable = VK_TRUE;
}
else
properties.ia.primitiveRestartEnable = VK_FALSE;
properties.ia.primitiveRestartEnable = VK_FALSE;
for (int i = 0; i < 4; ++i)
{
properties.att_state[i].colorWriteMask = 0xf;
properties.att_state[i].blendEnable = VK_FALSE;
}
for (int i = 0; i < 4; ++i)
{
properties.att_state[i].colorWriteMask = 0xf;
properties.att_state[i].blendEnable = VK_FALSE;
}
VkColorComponentFlags mask = 0;
if (rsx::method_registers.color_mask_a()) mask |= VK_COLOR_COMPONENT_A_BIT;
if (rsx::method_registers.color_mask_b()) mask |= VK_COLOR_COMPONENT_B_BIT;
if (rsx::method_registers.color_mask_g()) mask |= VK_COLOR_COMPONENT_G_BIT;
if (rsx::method_registers.color_mask_r()) mask |= VK_COLOR_COMPONENT_R_BIT;
VkColorComponentFlags mask = 0;
if (rsx::method_registers.color_mask_a()) mask |= VK_COLOR_COMPONENT_A_BIT;
if (rsx::method_registers.color_mask_b()) mask |= VK_COLOR_COMPONENT_B_BIT;
if (rsx::method_registers.color_mask_g()) mask |= VK_COLOR_COMPONENT_G_BIT;
if (rsx::method_registers.color_mask_r()) mask |= VK_COLOR_COMPONENT_R_BIT;
VkColorComponentFlags color_masks[4] = { mask };
VkColorComponentFlags color_masks[4] = { mask };
u8 render_targets[] = { 0, 1, 2, 3 };
for (u8 idx = 0; idx < m_draw_buffers_count; ++idx)
{
properties.att_state[render_targets[idx]].colorWriteMask = mask;
}
if (rsx::method_registers.blend_enabled())
{
VkBlendFactor sfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_rgb());
VkBlendFactor sfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_a());
VkBlendFactor dfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_rgb());
VkBlendFactor dfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_a());
VkBlendOp equation_rgb = vk::get_blend_op(rsx::method_registers.blend_equation_rgb());
VkBlendOp equation_a = vk::get_blend_op(rsx::method_registers.blend_equation_a());
u8 render_targets[] = { 0, 1, 2, 3 };
for (u8 idx = 0; idx < m_draw_buffers_count; ++idx)
{
properties.att_state[render_targets[idx]].blendEnable = VK_TRUE;
properties.att_state[render_targets[idx]].srcColorBlendFactor = sfactor_rgb;
properties.att_state[render_targets[idx]].dstColorBlendFactor = dfactor_rgb;
properties.att_state[render_targets[idx]].srcAlphaBlendFactor = sfactor_a;
properties.att_state[render_targets[idx]].dstAlphaBlendFactor = dfactor_a;
properties.att_state[render_targets[idx]].colorBlendOp = equation_rgb;
properties.att_state[render_targets[idx]].alphaBlendOp = equation_a;
properties.att_state[render_targets[idx]].colorWriteMask = mask;
}
auto blend_colors = rsx::get_constant_blend_colors();
properties.cs.blendConstants[0] = blend_colors[0];
properties.cs.blendConstants[1] = blend_colors[1];
properties.cs.blendConstants[2] = blend_colors[2];
properties.cs.blendConstants[3] = blend_colors[3];
}
else
{
for (u8 idx = 0; idx < m_draw_buffers_count; ++idx)
if (rsx::method_registers.blend_enabled())
{
properties.att_state[render_targets[idx]].blendEnable = VK_FALSE;
}
}
properties.cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
properties.cs.attachmentCount = m_draw_buffers_count;
properties.cs.pAttachments = properties.att_state;
if (rsx::method_registers.logic_op_enabled())
{
properties.cs.logicOpEnable = true;
properties.cs.logicOp = vk::get_logic_op(rsx::method_registers.logic_operation());
}
VkBlendFactor sfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_rgb());
VkBlendFactor sfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_a());
VkBlendFactor dfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_rgb());
VkBlendFactor dfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_a());
properties.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
properties.ds.depthWriteEnable = rsx::method_registers.depth_write_enabled() ? VK_TRUE : VK_FALSE;
VkBlendOp equation_rgb = vk::get_blend_op(rsx::method_registers.blend_equation_rgb());
VkBlendOp equation_a = vk::get_blend_op(rsx::method_registers.blend_equation_a());
if (rsx::method_registers.depth_bounds_test_enabled())
{
properties.ds.depthBoundsTestEnable = VK_TRUE;
properties.ds.minDepthBounds = rsx::method_registers.depth_bounds_min();
properties.ds.maxDepthBounds = rsx::method_registers.depth_bounds_max();
}
else
properties.ds.depthBoundsTestEnable = VK_FALSE;
for (u8 idx = 0; idx < m_draw_buffers_count; ++idx)
{
properties.att_state[render_targets[idx]].blendEnable = VK_TRUE;
properties.att_state[render_targets[idx]].srcColorBlendFactor = sfactor_rgb;
properties.att_state[render_targets[idx]].dstColorBlendFactor = dfactor_rgb;
properties.att_state[render_targets[idx]].srcAlphaBlendFactor = sfactor_a;
properties.att_state[render_targets[idx]].dstAlphaBlendFactor = dfactor_a;
properties.att_state[render_targets[idx]].colorBlendOp = equation_rgb;
properties.att_state[render_targets[idx]].alphaBlendOp = equation_a;
}
if (rsx::method_registers.stencil_test_enabled())
{
properties.ds.stencilTestEnable = VK_TRUE;
properties.ds.front.writeMask = rsx::method_registers.stencil_mask();
properties.ds.front.compareMask = rsx::method_registers.stencil_func_mask();
properties.ds.front.reference = rsx::method_registers.stencil_func_ref();
properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail());
properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass());
properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail());
properties.ds.front.compareOp = vk::get_compare_func(rsx::method_registers.stencil_func());
if (rsx::method_registers.two_sided_stencil_test_enabled())
{
properties.ds.back.writeMask = rsx::method_registers.back_stencil_mask();
properties.ds.back.compareMask = rsx::method_registers.back_stencil_func_mask();
properties.ds.back.reference = rsx::method_registers.back_stencil_func_ref();
properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail());
properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass());
properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail());
properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func());
auto blend_colors = rsx::get_constant_blend_colors();
properties.cs.blendConstants[0] = blend_colors[0];
properties.cs.blendConstants[1] = blend_colors[1];
properties.cs.blendConstants[2] = blend_colors[2];
properties.cs.blendConstants[3] = blend_colors[3];
}
else
properties.ds.back = properties.ds.front;
{
for (u8 idx = 0; idx < m_draw_buffers_count; ++idx)
{
properties.att_state[render_targets[idx]].blendEnable = VK_FALSE;
}
}
properties.cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
properties.cs.attachmentCount = m_draw_buffers_count;
properties.cs.pAttachments = properties.att_state;
if (rsx::method_registers.logic_op_enabled())
{
properties.cs.logicOpEnable = true;
properties.cs.logicOp = vk::get_logic_op(rsx::method_registers.logic_operation());
}
properties.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
properties.ds.depthWriteEnable = rsx::method_registers.depth_write_enabled() ? VK_TRUE : VK_FALSE;
if (rsx::method_registers.depth_bounds_test_enabled())
{
properties.ds.depthBoundsTestEnable = VK_TRUE;
properties.ds.minDepthBounds = rsx::method_registers.depth_bounds_min();
properties.ds.maxDepthBounds = rsx::method_registers.depth_bounds_max();
}
else
properties.ds.depthBoundsTestEnable = VK_FALSE;
if (rsx::method_registers.stencil_test_enabled())
{
properties.ds.stencilTestEnable = VK_TRUE;
properties.ds.front.writeMask = rsx::method_registers.stencil_mask();
properties.ds.front.compareMask = rsx::method_registers.stencil_func_mask();
properties.ds.front.reference = rsx::method_registers.stencil_func_ref();
properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail());
properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass());
properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail());
properties.ds.front.compareOp = vk::get_compare_func(rsx::method_registers.stencil_func());
if (rsx::method_registers.two_sided_stencil_test_enabled())
{
properties.ds.back.writeMask = rsx::method_registers.back_stencil_mask();
properties.ds.back.compareMask = rsx::method_registers.back_stencil_func_mask();
properties.ds.back.reference = rsx::method_registers.back_stencil_func_ref();
properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail());
properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass());
properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail());
properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func());
}
else
properties.ds.back = properties.ds.front;
}
else
properties.ds.stencilTestEnable = VK_FALSE;
if (rsx::method_registers.depth_test_enabled())
{
properties.ds.depthTestEnable = VK_TRUE;
properties.ds.depthCompareOp = vk::get_compare_func(rsx::method_registers.depth_func());
}
else
properties.ds.depthTestEnable = VK_FALSE;
properties.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
properties.rs.polygonMode = VK_POLYGON_MODE_FILL;
properties.rs.depthClampEnable = VK_FALSE;
properties.rs.rasterizerDiscardEnable = VK_FALSE;
properties.rs.depthBiasEnable = VK_FALSE;
if (rsx::method_registers.cull_face_enabled())
properties.rs.cullMode = vk::get_cull_face(rsx::method_registers.cull_face_mode());
else
properties.rs.cullMode = VK_CULL_MODE_NONE;
properties.rs.frontFace = vk::get_front_face(rsx::method_registers.front_face_mode());
size_t idx = vk::get_render_pass_location(
vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first,
vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()),
(u8)vk::get_draw_buffers(rsx::method_registers.surface_color_target()).size());
properties.render_pass = m_render_passes[idx];
properties.num_targets = m_draw_buffers_count;
vk::enter_uninterruptible();
//Load current program from buffer
m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get();
}
else
properties.ds.stencilTestEnable = VK_FALSE;
if (rsx::method_registers.depth_test_enabled())
{
properties.ds.depthTestEnable = VK_TRUE;
properties.ds.depthCompareOp = vk::get_compare_func(rsx::method_registers.depth_func());
}
else
properties.ds.depthTestEnable = VK_FALSE;
properties.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
properties.rs.polygonMode = VK_POLYGON_MODE_FILL;
properties.rs.depthClampEnable = VK_FALSE;
properties.rs.rasterizerDiscardEnable = VK_FALSE;
properties.rs.depthBiasEnable = VK_FALSE;
if (rsx::method_registers.cull_face_enabled())
properties.rs.cullMode = vk::get_cull_face(rsx::method_registers.cull_face_mode());
else
properties.rs.cullMode = VK_CULL_MODE_NONE;
properties.rs.frontFace = vk::get_front_face(rsx::method_registers.front_face_mode());
size_t idx = vk::get_render_pass_location(
vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first,
vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()),
(u8)vk::get_draw_buffers(rsx::method_registers.surface_color_target()).size());
properties.render_pass = m_render_passes[idx];
properties.num_targets = m_draw_buffers_count;
vk::enter_uninterruptible();
//Load current program from buffer
m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get();
vk::enter_uninterruptible();
//TODO: Update constant buffers..
//1. Update scale-offset matrix
@ -1534,26 +1632,35 @@ bool VKGSRender::load_program()
m_uniform_buffer_ring_info.unmap();
const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float));
buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float));
fill_vertex_program_constants_data(buf);
*(reinterpret_cast<u32*>(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits();
m_uniform_buffer_ring_info.unmap();
const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float));
const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz);
buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz);
if (fragment_constants_sz)
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, fragment_program);
fill_fragment_state_buffer(buf+fragment_constants_sz, fragment_program);
m_uniform_buffer_ring_info.unmap();
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets);
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
if (!fast_update || m_transform_constants_dirty)
{
const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float));
buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float));
fill_vertex_program_constants_data(buf);
*(reinterpret_cast<u32*>(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits();
m_uniform_buffer_ring_info.unmap();
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
m_transform_constants_dirty = false;
}
if (!fast_update)
{
const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float));
const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz);
buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz);
if (fragment_constants_sz)
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, fragment_program);
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program);
m_uniform_buffer_ring_info.unmap();
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
}
vk::leave_uninterruptible();
@ -1642,6 +1749,7 @@ void VKGSRender::prepare_rtts()
if (!m_rtts_dirty)
return;
close_render_pass();
copy_render_targets_to_dma_location();
m_rtts_dirty = false;
@ -1773,6 +1881,7 @@ void VKGSRender::flip(int buffer)
std::chrono::time_point<steady_clock> flip_start = steady_clock::now();
close_render_pass();
process_swap_request();
if (!resize_screen)
@ -1859,7 +1968,7 @@ void VKGSRender::flip(int buffer)
swap_image_view.push_back(std::make_unique<vk::image_view>(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres));
direct_fbo.reset(new vk::framebuffer(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view)));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls) + ", instanced repeats: " + std::to_string(m_instanced_draws));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), "texture upload time: " + std::to_string(m_textures_upload_time) + "us");
@ -1956,6 +2065,7 @@ void VKGSRender::flip(int buffer)
//Resource destruction is handled within the real swap handler
m_draw_calls = 0;
m_instanced_draws = 0;
m_draw_time = 0;
m_setup_time = 0;
m_vertex_upload_time = 0;

View file

@ -156,6 +156,8 @@ private:
u32 m_client_height = 0;
u32 m_draw_calls = 0;
u32 m_instanced_draws = 0;
s64 m_setup_time = 0;
s64 m_vertex_upload_time = 0;
s64 m_textures_upload_time = 0;
@ -176,6 +178,16 @@ private:
std::atomic<int> m_queued_threads = { 0 };
std::thread::id rsx_thread;
VkPrimitiveTopology m_last_primititve_type;
VkIndexType m_last_ib_type;
VkDescriptorSet m_last_descriptor_set;
size_t m_last_ib_offset;
u32 m_last_vertex_count;
bool m_last_draw_indexed;
u32 m_last_instanced_cb_index;
bool render_pass_open = false;
#ifdef __linux__
Display *m_display_handle = nullptr;
@ -197,10 +209,15 @@ private:
void queue_swap_request();
void process_swap_request();
void begin_render_pass();
void close_render_pass();
void emit_geometry_instance(u32 instance_count);
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
public:
bool load_program();
bool load_program(bool fast_update = false);
void init_buffers(bool skip_reading = false);
void read_buffers();
void write_buffers();

View file

@ -18,7 +18,7 @@
#include "../Common/TextureUtils.h"
#include "../Common/ring_buffer_helper.h"
#define DESCRIPTOR_MAX_DRAW_CALLS 1024
#define DESCRIPTOR_MAX_DRAW_CALLS 4096
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 3
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2
@ -1391,6 +1391,7 @@ namespace vk
VkDevice m_device;
public:
VkPipeline pipeline;
u64 attribute_location_mask;
program(VkDevice dev, VkPipeline p, const std::vector<program_input> &vertex_input, const std::vector<program_input>& fragment_inputs);
program(const program&) = delete;
@ -1409,14 +1410,17 @@ namespace vk
struct vk_data_heap : public data_heap
{
std::unique_ptr<vk::buffer> heap;
bool mapped = false;
void* map(size_t offset, size_t size)
{
mapped = true;
return heap->map(offset, size);
}
void unmap()
{
mapped = false;
heap->unmap();
}
};

View file

@ -10,6 +10,7 @@ namespace vk
{
load_uniforms(glsl::program_domain::glsl_vertex_program, vertex_input);
load_uniforms(glsl::program_domain::glsl_vertex_program, fragment_inputs);
attribute_location_mask = 0;
}
program::~program()
@ -60,6 +61,7 @@ namespace vk
descriptor_writer.dstBinding = uniform.location + TEXTURES_FIRST_BIND_SLOT;
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << (uniform.location + TEXTURES_FIRST_BIND_SLOT));
return;
}
}
@ -79,6 +81,7 @@ namespace vk
descriptor_writer.dstBinding = binding_point;
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << binding_point);
}
void program::bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set)
@ -97,6 +100,7 @@ namespace vk
descriptor_writer.dstBinding = uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT;
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << (uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT));
return;
}
}

View file

@ -347,11 +347,13 @@ namespace
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_buffers_f)
get_vertex_buffers_f,
VKGSRender *thread)
: m_device(device), m_index_buffer_ring_info(index_buffer_ring_info),
m_attrib_ring_info(attrib_ring_info), m_program(program),
m_descriptor_sets(descriptor_sets), m_buffer_view_to_clean(buffer_view_to_clean),
get_vertex_buffers(get_vertex_buffers_f)
get_vertex_buffers(get_vertex_buffers_f),
rsxthr(thread)
{
}
@ -450,14 +452,129 @@ namespace
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_buffers;
VKGSRender* rsxthr;
void upload_vertex_buffers(u32 min_index, u32 vertex_max_index)
{
vertex_buffer_visitor visitor(vertex_max_index - min_index + 1, m_device,
const u32 vertex_count = vertex_max_index - min_index + 1;
vertex_buffer_visitor visitor(vertex_count, m_device,
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean);
const auto& vertex_buffers = get_vertex_buffers(
rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo);
//1. Check if we can get all these allocations at once
std::vector<size_t> memory_allocations(16);
std::vector<u32> allocated_sizes(16);
std::vector<int> upload_jobs(16);
memory_allocations.resize(0);
allocated_sizes.resize(0);
upload_jobs.resize(0);
for (int i = 0; i < vertex_buffers.size(); ++i)
{
const auto &vbo = vertex_buffers[i];
if (vbo.which() == 0 && vertex_count > 128 && vertex_buffers.size() > 2 && rsxthr->vertex_upload_task_ready())
{
//vertex array buffer. We can thread this thing heavily
const auto& v = vbo.get<rsx::vertex_array_buffer>();
u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
u32 upload_size = real_element_size * vertex_count;
size_t offset = m_attrib_ring_info.alloc<256>(upload_size);
memory_allocations.push_back(offset);
allocated_sizes.push_back(upload_size);
upload_jobs.push_back(i);
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
}
else
std::apply_visitor(visitor, vbo);
}
if (memory_allocations.size() > 0)
{
if (memory_allocations.size() > 1)
{
//2 sets in case the blocks dont fit
u8 available_jobs[2] = {};
u32 allocated_block[2] = {};
size_t last_offset = memory_allocations[0];
u8 current_index = 0;
for (int n = 0; n < memory_allocations.size(); ++n)
{
if (memory_allocations[n] < last_offset)
{
//queue went around
current_index = 1;
}
available_jobs[current_index] ++;
allocated_block[current_index] += allocated_sizes[n];
}
int n = 0;
for (int task = 0; task < 2; ++task)
{
if (available_jobs[task])
{
if (m_attrib_ring_info.mapped)
{
rsxthr->wait_for_vertex_upload_task();
m_attrib_ring_info.unmap();
}
size_t space_remaining = allocated_block[task];
size_t offset_base = memory_allocations[n];
gsl::byte* dst = (gsl::byte*)m_attrib_ring_info.map(memory_allocations[n], space_remaining);
while (true)
{
if (space_remaining == 0)
break;
const auto& vertex_array = vertex_buffers[upload_jobs[n]].get<rsx::vertex_array_buffer>();
const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size);
gsl::span<gsl::byte> dest_span(dst + (memory_allocations[n] - offset_base), allocated_sizes[n]);
rsxthr->post_vertex_stream_to_upload(vertex_array.data, dest_span, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, real_element_size, vk::prepare_buffer_for_writing);
space_remaining -= allocated_sizes[n];
n++;
}
rsxthr->start_vertex_upload_task(vertex_count);
}
}
}
else
{
const size_t offset_in_attrib_buffer = memory_allocations[0];
const u32 upload_size = allocated_sizes[0];
const auto& vertex_array = vertex_buffers[upload_jobs[0]].get<rsx::vertex_array_buffer>();
const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size);
gsl::span<gsl::byte> dest_span(static_cast<gsl::byte*>(dst), upload_size);
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, real_element_size);
vk::prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count);
m_attrib_ring_info.unmap();
}
}
}
u32 upload_inlined_array()
@ -551,6 +668,6 @@ VKGSRender::upload_vertex_data()
{
draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program,
descriptor_sets, m_buffer_view_to_clean,
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range); });
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range);}, this);
return std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
}

View file

@ -385,6 +385,20 @@ namespace rsx
rsx->m_textures_dirty[index] = true;
}
};
template<u32 index>
struct set_vertex_array_dirty_bit
{
static void impl(thread* rsx, u32, u32)
{
rsx->m_vertex_attribs_changed = true;
}
};
void set_idbuf_dirty_bit(thread* rsx, u32, u32)
{
rsx->m_index_buffer_changed = true;
}
}
namespace nv308a
@ -1428,6 +1442,8 @@ namespace rsx
bind_range<NV4097_SET_TEXTURE_FILTER, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_IMAGE_RECT, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_BORDER_COLOR, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 1, 16, nv4097::set_vertex_array_dirty_bit>();
bind<NV4097_SET_INDEX_ARRAY_ADDRESS, nv4097::set_idbuf_dirty_bit>();
//NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();

View file

@ -117,6 +117,7 @@ struct push_buffer_vertex_info
case vertex_base_type::f:
*(u32*)dst = se_storage<u32>::swap(arg);
break;
case vertex_base_type::s1:
case vertex_base_type::ub:
case vertex_base_type::ub256:
*(u32*)dst = arg;

View file

@ -314,6 +314,9 @@ struct cfg_root : cfg::node
cfg::_bool invalidate_surface_cache_every_frame{this, "Invalidate Cache Every Frame", true};
cfg::_bool strict_rendering_mode{this, "Strict Rendering Mode"};
cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false};
cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 };
struct node_d3d12 : cfg::node
{
node_d3d12(cfg::node* _this) : cfg::node(_this, "D3D12") {}