vk/rsx: Bug fixes and improvements

- Improvements to framebuffer usage; Avoid creating new resources every frame
- Handle null fragment program properly
- Collect vertex upload statistics

- vk: Pre-initialize 'unused' varying registers in the vertex shader in case it gets matched with a fs that consumes it
 -- Fixes a crash about fog_c not being declared

gl/dx12/vk: Handle null fragment program

- cleanup - use yield semantic instead of sleep(0) as yield is more cross-platform
 -- sleep(0) is a windows specific scheduler hint
This commit is contained in:
kd-11 2017-07-13 00:49:50 +03:00
parent 72e13ddeb2
commit 05ffb50037
11 changed files with 317 additions and 97 deletions

View file

@ -324,6 +324,17 @@ void D3D12GSRender::end()
{
std::chrono::time_point<steady_clock> start_duration = steady_clock::now();
std::chrono::time_point<steady_clock> program_load_start = steady_clock::now();
load_program();
std::chrono::time_point<steady_clock> program_load_end = steady_clock::now();
m_timers.program_load_duration += std::chrono::duration_cast<std::chrono::microseconds>(program_load_end - program_load_start).count();
if (!m_fragment_program.valid)
{
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> rtt_duration_start = steady_clock::now();
prepare_render_targets(get_current_resource_storage().command_list.Get());
@ -344,11 +355,6 @@ void D3D12GSRender::end()
std::chrono::time_point<steady_clock> vertex_index_duration_end = steady_clock::now();
m_timers.vertex_index_duration += std::chrono::duration_cast<std::chrono::microseconds>(vertex_index_duration_end - vertex_index_duration_start).count();
std::chrono::time_point<steady_clock> program_load_start = steady_clock::now();
load_program();
std::chrono::time_point<steady_clock> program_load_end = steady_clock::now();
m_timers.program_load_duration += std::chrono::duration_cast<std::chrono::microseconds>(program_load_end - program_load_start).count();
get_current_resource_storage().command_list->SetGraphicsRootSignature(m_shared_root_signature.Get());
get_current_resource_storage().command_list->OMSetStencilRef(rsx::method_registers.stencil_func_ref());

View file

@ -56,6 +56,9 @@ void D3D12GSRender::load_program()
m_vertex_program = get_current_vertex_program();
m_fragment_program = get_current_fragment_program(rtt_lookup_func);
if (!m_fragment_program.valid)
return;
D3D12PipelineProperties prop = {};
prop.Topology = get_primitive_topology_type(rsx::method_registers.current_draw_clause.primitive);

View file

@ -322,17 +322,15 @@ namespace
void GLGSRender::end()
{
if (skip_frame || !framebuffer_status_valid)
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
if (skip_frame || !framebuffer_status_valid || !load_program())
{
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
load_program();
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
@ -841,8 +839,10 @@ bool GLGSRender::load_program()
return std::make_tuple(true, surface->get_native_pitch());
};
RSXVertexProgram vertex_program = get_current_vertex_program();
RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func);
if (!fragment_program.valid) return false;
RSXVertexProgram vertex_program = get_current_vertex_program();
u32 unnormalized_rtts = 0;

View file

@ -239,6 +239,8 @@ struct RSXFragmentProgram
u8 textures_alpha_kill[16];
u32 textures_zfunc[16];
bool valid;
rsx::texture_dimension_extended get_texture_dimension(u8 id) const
{
return (rsx::texture_dimension_extended)((texture_dimensions >> (id * 2)) & 0x3);
@ -263,6 +265,7 @@ struct RSXFragmentProgram
, ctrl(0)
, unnormalized_coords(0)
, texture_dimensions(0)
, valid(false)
{
}
};

View file

@ -974,9 +974,17 @@ namespace rsx
RSXFragmentProgram thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info) const
{
RSXFragmentProgram result = {};
u32 shader_program = rsx::method_registers.shader_program_address();
result.offset = shader_program & ~0x3;
result.addr = vm::base(rsx::get_address(result.offset, (shader_program & 0x3) - 1));
const u32 shader_program = rsx::method_registers.shader_program_address();
if (shader_program == 0)
return result;
const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3);
result.offset = program_offset;
result.addr = vm::base(rsx::get_address(program_offset, program_location));
result.valid = true;
result.ctrl = rsx::method_registers.shader_control();
result.unnormalized_coords = 0;
result.front_back_color_enabled = !rsx::method_registers.two_side_light_en();
@ -1174,26 +1182,26 @@ namespace rsx
if (packet.post_upload_func)
packet.post_upload_func(packet.dst_span.data(), packet.type, (u8)packet.vector_width, task.vertex_count);
_mm_sfence();
task.remaining_packets--;
current_job += step;
_mm_sfence();
}
_mm_mfence();
while (task.remaining_packets > 0 && !Emu.IsStopped())
{
std::this_thread::yield();
_mm_lfence();
std::this_thread::sleep_for(0us);
}
_mm_sfence();
task.ready_threads++;
_mm_sfence();
}
else
std::this_thread::sleep_for(0us);
//thread_ctrl::wait();
//busy_wait();
{
std::this_thread::yield();
}
}
});
}
@ -1201,8 +1209,7 @@ namespace rsx
while (m_vertex_streaming_task.ready_threads != 0 && !Emu.IsStopped())
{
_mm_lfence();
busy_wait();
_mm_pause();
}
m_vertex_streaming_task.vertex_count = vertex_count;
@ -1214,8 +1221,7 @@ namespace rsx
{
while (m_vertex_streaming_task.remaining_packets > 0 && !Emu.IsStopped())
{
_mm_lfence();
busy_wait();
_mm_pause();
}
m_vertex_streaming_task.packets.resize(0);

View file

@ -677,6 +677,7 @@ VKGSRender::~VKGSRender()
m_buffer_view_to_clean.clear();
m_sampler_to_clean.clear();
m_framebuffer_to_clean.clear();
m_draw_fbo.reset();
//Render passes
for (auto &render_pass : m_render_passes)
@ -880,11 +881,11 @@ void VKGSRender::begin_render_pass()
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = current_render_pass;
rp_begin.framebuffer = m_framebuffer_to_clean.back()->value;
rp_begin.framebuffer = m_draw_fbo->value;
rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = m_framebuffer_to_clean.back()->width();
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height();
rp_begin.renderArea.extent.width = m_draw_fbo->width();
rp_begin.renderArea.extent.height = m_draw_fbo->height();
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
render_pass_open = true;
@ -941,10 +942,15 @@ void VKGSRender::end()
}
//Load program here since it is dependent on vertex state
load_program(is_instanced);
if (!load_program(is_instanced))
{
LOG_ERROR(RSX, "No valid program bound to pipeline. Skipping draw");
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
if (is_instanced)
{
@ -1123,7 +1129,7 @@ void VKGSRender::end()
depth_clear_value.depthStencil.depth = 1.f;
depth_clear_value.depthStencil.stencil = 255;
VkClearRect clear_rect = { 0, 0, m_framebuffer_to_clean.back()->width(), m_framebuffer_to_clean.back()->height(), 0, 1 };
VkClearRect clear_rect = { 0, 0, m_draw_fbo->width(), m_draw_fbo->height(), 0, 1 };
VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value };
vkCmdClearAttachments(*m_current_command_buffer, 1, &clear_desc, 1, &clear_rect);
@ -1133,18 +1139,15 @@ void VKGSRender::end()
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<2>(upload_info);
if (m_attrib_ring_info.mapped)
{
wait_for_vertex_upload_task();
m_attrib_ring_info.unmap();
}
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
if (!index_info)
{
vkCmdDraw(*m_current_command_buffer, std::get<1>(upload_info), 1, 0, 0);
const auto vertex_count = std::get<1>(upload_info);
vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0);
m_last_vertex_count = vertex_count;
m_last_draw_indexed = false;
}
else
@ -1175,6 +1178,22 @@ void VKGSRender::end()
copy_render_targets_to_dma_location();
m_draw_calls++;
if (g_cfg.video.overlay)
{
if (m_last_vertex_count < 1024)
m_uploads_small++;
else if (m_last_vertex_count < 2048)
m_uploads_1k++;
else if (m_last_vertex_count < 4096)
m_uploads_2k++;
else if (m_last_vertex_count < 8192)
m_uploads_4k++;
else if (m_last_vertex_count < 16384)
m_uploads_8k++;
else
m_uploads_16k++;
}
rsx::thread::end();
}
@ -1260,8 +1279,8 @@ void VKGSRender::clear_surface(u32 mask)
u16 scissor_y = rsx::method_registers.scissor_origin_y();
u16 scissor_h = rsx::method_registers.scissor_height();
const u32 fb_width = m_framebuffer_to_clean.back()->width();
const u32 fb_height = m_framebuffer_to_clean.back()->height();
const u32 fb_width = m_draw_fbo->width();
const u32 fb_height = m_draw_fbo->height();
//clip region
std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region<u16>(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true);
@ -1392,6 +1411,12 @@ void VKGSRender::copy_render_targets_to_dma_location()
void VKGSRender::flush_command_queue(bool hard_sync)
{
if (m_attrib_ring_info.mapped)
{
wait_for_vertex_upload_task();
m_attrib_ring_info.unmap();
}
close_render_pass();
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
@ -1480,7 +1505,13 @@ void VKGSRender::process_swap_request()
m_buffer_view_to_clean.clear();
m_sampler_to_clean.clear();
m_framebuffer_to_clean.clear();
m_framebuffer_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
{
if (fbo->deref_count >= 2) return true;
fbo->deref_count++;
return false;
});
if (g_cfg.video.overlay)
{
@ -1545,8 +1576,10 @@ bool VKGSRender::load_program(bool fast_update)
return std::make_tuple(true, surface->native_pitch);
};
vertex_program = get_current_vertex_program();
fragment_program = get_current_fragment_program(rtt_lookup_func);
if (!fragment_program.valid) return false;
vertex_program = get_current_vertex_program();
vk::pipeline_props properties = {};
@ -1864,6 +1897,35 @@ void VKGSRender::prepare_rtts()
const u32 surface_pitchs[] = { rsx::method_registers.surface_a_pitch(), rsx::method_registers.surface_b_pitch(),
rsx::method_registers.surface_c_pitch(), rsx::method_registers.surface_d_pitch() };
if (m_draw_fbo)
{
const u32 fb_width = m_draw_fbo->width();
const u32 fb_height = m_draw_fbo->height();
bool really_changed = false;
if (fb_width == clip_width && fb_height == clip_height)
{
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (m_surface_info[i].address != surface_addresses[i])
{
really_changed = true;
break;
}
}
if (!really_changed)
{
if (zeta_address == m_depth_surface_info.address)
{
//Nothing has changed, we're still using the same framebuffer
return;
}
}
}
}
m_rtts.prepare_render_target(&*m_current_command_buffer,
rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(),
clip_width, clip_height,
@ -1887,20 +1949,16 @@ void VKGSRender::prepare_rtts()
//Bind created rtts as current fbo...
std::vector<u8> draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target());
std::vector<std::unique_ptr<vk::image_view>> fbo_images;
//Search old framebuffers for this same configuration
bool framebuffer_found = false;
std::vector<vk::image*> bound_images;
bound_images.reserve(5);
for (u8 index : draw_buffers)
{
vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]);
VkImageSubresourceRange subres = {};
subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
bound_images.push_back(std::get<1>(m_rtts.m_bound_render_targets[index]));
m_surface_info[index].address = surface_addresses[index];
m_surface_info[index].pitch = surface_pitchs[index];
@ -1913,20 +1971,9 @@ void VKGSRender::prepare_rtts()
}
}
m_draw_buffers_count = static_cast<u32>(fbo_images.size());
if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr)
if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0)
{
vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil));
VkImageSubresourceRange subres = {};
subres.aspectMask = (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
bound_images.push_back(std::get<1>(m_rtts.m_bound_depth_stencil));
m_depth_surface_info.address = zeta_address;
m_depth_surface_info.pitch = rsx::method_registers.surface_z_pitch();
@ -1935,6 +1982,8 @@ void VKGSRender::prepare_rtts()
m_depth_surface_info.pitch = 0;
}
m_draw_buffers_count = static_cast<u32>(bound_images.size());
if (g_cfg.video.write_color_buffers)
{
for (u8 index : draw_buffers)
@ -1943,7 +1992,7 @@ void VKGSRender::prepare_rtts()
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height);
m_surface_info[index].width, m_surface_info[index].height);
}
}
@ -1960,10 +2009,59 @@ void VKGSRender::prepare_rtts()
}
}
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());
VkRenderPass current_render_pass = m_render_passes[idx];
for (auto &fbo : m_framebuffer_to_clean)
{
if (fbo->matches(bound_images, clip_width, clip_height))
{
m_draw_fbo.swap(fbo);
m_draw_fbo->reset_refs();
framebuffer_found = true;
//LOG_ERROR(RSX, "Matching framebuffer exists, using that instead");
break;
}
}
m_framebuffer_to_clean.push_back(std::make_unique<vk::framebuffer>(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images)));
if (!framebuffer_found)
{
std::vector<std::unique_ptr<vk::image_view>> fbo_images;
fbo_images.reserve(5);
for (u8 index : draw_buffers)
{
vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]);
VkImageSubresourceRange subres = {};
subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
}
if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr)
{
vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil));
VkImageSubresourceRange subres = {};
subres.aspectMask = (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
}
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());
VkRenderPass current_render_pass = m_render_passes[idx];
if (m_draw_fbo)
m_framebuffer_to_clean.push_back(std::move(m_draw_fbo));
m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images)));
}
}
@ -1982,6 +2080,13 @@ void VKGSRender::flip(int buffer)
m_setup_time = 0;
m_vertex_upload_time = 0;
m_textures_upload_time = 0;
m_uploads_small = 0;
m_uploads_1k = 0;
m_uploads_2k = 0;
m_uploads_4k = 0;
m_uploads_8k = 0;
m_uploads_16k = 0;
}
return;
@ -2061,7 +2166,7 @@ void VKGSRender::flip(int buffer)
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range);
}
std::unique_ptr<vk::framebuffer> direct_fbo;
std::unique_ptr<vk::framebuffer_holder> direct_fbo;
std::vector<std::unique_ptr<vk::image_view>> swap_image_view;
if (g_cfg.video.overlay)
{
@ -2083,9 +2188,24 @@ void VKGSRender::flip(int buffer)
size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1);
VkRenderPass single_target_pass = m_render_passes[idx];
swap_image_view.push_back(std::make_unique<vk::image_view>(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres));
direct_fbo.reset(new vk::framebuffer(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view)));
for (auto &It = m_framebuffer_to_clean.begin(); It != m_framebuffer_to_clean.end(); It++)
{
auto &fbo = *It;
if (fbo->attachments[0]->info.image == target_image)
{
direct_fbo.swap(fbo);
direct_fbo->reset_refs();
m_framebuffer_to_clean.erase(It);
break;
}
}
if (!direct_fbo)
{
swap_image_view.push_back(std::make_unique<vk::image_view>(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres));
direct_fbo.reset(new vk::framebuffer_holder(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view)));
}
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls) + ", instanced repeats: " + std::to_string(m_instanced_draws));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
@ -2093,10 +2213,29 @@ void VKGSRender::flip(int buffer)
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
//Vertex upload statistics
u32 _small, _1k, _2k, _4k, _8k, _16k;
if (m_draw_calls > 0)
{
_small = m_uploads_small * 100 / m_draw_calls;
_1k = m_uploads_1k * 100 / m_draw_calls;
_2k = m_uploads_2k * 100 / m_draw_calls;
_4k = m_uploads_4k * 100 / m_draw_calls;
_8k = m_uploads_8k * 100 / m_draw_calls;
_16k = m_uploads_16k * 100 / m_draw_calls;
}
else
{
_small = _1k = _2k = _4k = _8k = _16k = 0;
}
std::string message = fmt::format("Vertex sizes: < 1k: %d%%, 1k+: %d%%, 2k+: %d%%, 4k+: %d%%, 8k+: %d%%, 16k+: %d%%", _small, _1k, _2k, _4k, _8k, _16k);
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), message);
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
m_framebuffer_to_clean.push_back(std::move(direct_fbo));
}
m_framebuffer_to_clean.push_back(std::move(direct_fbo));
queue_swap_request();
}
else
@ -2194,4 +2333,11 @@ void VKGSRender::flip(int buffer)
m_setup_time = 0;
m_vertex_upload_time = 0;
m_textures_upload_time = 0;
m_uploads_small = 0;
m_uploads_1k = 0;
m_uploads_2k = 0;
m_uploads_4k = 0;
m_uploads_8k = 0;
m_uploads_16k = 0;
}

View file

@ -149,15 +149,26 @@ private:
vk::descriptor_pool descriptor_pool;
std::vector<std::unique_ptr<vk::buffer_view> > m_buffer_view_to_clean;
std::vector<std::unique_ptr<vk::framebuffer> > m_framebuffer_to_clean;
std::vector<std::unique_ptr<vk::sampler> > m_sampler_to_clean;
std::list<std::unique_ptr<vk::framebuffer_holder> > m_framebuffer_to_clean;
std::unique_ptr<vk::framebuffer_holder> m_draw_fbo;
u32 m_client_width = 0;
u32 m_client_height = 0;
// Draw call stats
u32 m_draw_calls = 0;
u32 m_instanced_draws = 0;
// Vertex buffer usage stats
u32 m_uploads_small = 0;
u32 m_uploads_1k = 0;
u32 m_uploads_2k = 0;
u32 m_uploads_4k = 0;
u32 m_uploads_8k = 0;
u32 m_uploads_16k = 0;
// Timers
s64 m_setup_time = 0;
s64 m_vertex_upload_time = 0;
s64 m_textures_upload_time = 0;

View file

@ -650,17 +650,17 @@ namespace vk
{
VkFramebuffer value;
VkFramebufferCreateInfo info = {};
std::vector<std::unique_ptr<vk::image_view>> attachements;
std::vector<std::unique_ptr<vk::image_view>> attachments;
u32 m_width = 0;
u32 m_height = 0;
public:
framebuffer(VkDevice dev, VkRenderPass pass, u32 width, u32 height, std::vector<std::unique_ptr<vk::image_view>> &&atts)
: m_device(dev), attachements(std::move(atts))
: m_device(dev), attachments(std::move(atts))
{
std::vector<VkImageView> image_view_array(attachements.size());
std::vector<VkImageView> image_view_array(attachments.size());
size_t i = 0;
for (const auto &att : attachements)
for (const auto &att : attachments)
{
image_view_array[i++] = att->value;
}
@ -694,6 +694,24 @@ namespace vk
return m_height;
}
bool matches(std::vector<vk::image*> fbo_images, u32 width, u32 height)
{
if (m_width != width || m_height != height)
return false;
if (fbo_images.size() != attachments.size())
return false;
for (int n = 0; n < fbo_images.size(); ++n)
{
if (attachments[n]->info.image != fbo_images[n]->value ||
attachments[n]->info.format != fbo_images[n]->info.format)
return false;
}
return true;
}
framebuffer(const framebuffer&) = delete;
framebuffer(framebuffer&&) = delete;

View file

@ -7,12 +7,17 @@
#include "../Common/TextureUtils.h"
#include "VKFormats.h"
struct ref_counted
{
u8 deref_count = 0;
void reset_refs() { deref_count = 0; }
};
namespace vk
{
struct render_target : public image
struct render_target : public image, public ref_counted
{
u8 deref_count = 0;
bool dirty = false;
u16 native_pitch = 0;
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
@ -36,6 +41,17 @@ namespace vk
mipmaps, layers, samples, initial_layout, tiling, usage, image_flags)
{}
};
struct framebuffer_holder: public vk::framebuffer, public ref_counted
{
framebuffer_holder(VkDevice dev,
VkRenderPass pass,
u32 width, u32 height,
std::vector<std::unique_ptr<vk::image_view>> &&atts)
: framebuffer(dev, pass, width, height, std::move(atts))
{}
};
}
namespace rsx
@ -270,9 +286,9 @@ namespace rsx
void free_invalidated()
{
invalidated_resources.remove_if([](std::unique_ptr<vk::render_target>& rtt)
invalidated_resources.remove_if([](std::unique_ptr<vk::render_target> &rtt)
{
if (rtt->deref_count > 1) return true;
if (rtt->deref_count >= 2) return true;
rtt->deref_count++;
return false;

View file

@ -477,7 +477,7 @@ namespace
{
const auto &vbo = vertex_buffers[i];
if (vbo.which() == 0 && vertex_count >= g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready())
if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready())
{
//vertex array buffer. We can thread this thing heavily
const auto& v = vbo.get<rsx::vertex_array_buffer>();

View file

@ -146,8 +146,9 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
static const vertex_reg_info reg_table[] =
{
{ "gl_Position", false, "dst_reg0", "", false },
{ "back_diff_color", true, "dst_reg1", "", false },
{ "back_spec_color", true, "dst_reg2", "", false },
//Technically these two are for both back and front
{ "back_diff_color", true, "dst_reg1", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTDIFFUSE },
{ "back_spec_color", true, "dst_reg2", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTSPECULAR },
{ "front_diff_color", true, "dst_reg3", "", false },
{ "front_spec_color", true, "dst_reg4", "", false },
{ "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG },
@ -159,15 +160,15 @@ static const vertex_reg_info reg_table[] =
{ "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 },
{ "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 },
{ "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 },
{ "tc0", true, "dst_reg7", "", false },
{ "tc1", true, "dst_reg8", "", false },
{ "tc2", true, "dst_reg9", "", false },
{ "tc3", true, "dst_reg10", "", false },
{ "tc4", true, "dst_reg11", "", false },
{ "tc5", true, "dst_reg12", "", false },
{ "tc6", true, "dst_reg13", "", false },
{ "tc7", true, "dst_reg14", "", false },
{ "tc8", true, "dst_reg15", "", false },
{ "tc0", true, "dst_reg7", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 },
{ "tc1", true, "dst_reg8", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 },
{ "tc2", true, "dst_reg9", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 },
{ "tc3", true, "dst_reg10", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX3 },
{ "tc4", true, "dst_reg11", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX4 },
{ "tc5", true, "dst_reg12", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX5 },
{ "tc6", true, "dst_reg13", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX6 },
{ "tc7", true, "dst_reg14", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX7 },
{ "tc8", true, "dst_reg15", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX8 },
{ "tc9", true, "dst_reg6", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX9 } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15.
};
@ -195,6 +196,16 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
const vk::varying_register_t &reg = vk::get_varying_register(i.name);
OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";\n";
}
else
{
//Force some outputs to be declared even if unused so we can set default values
//NOTE: Registers that can be skept will not have their check_mask_value set
if (i.need_declare && (rsx_vertex_program.output_mask & i.check_mask_value) > 0)
{
const vk::varying_register_t &reg = vk::get_varying_register(i.name);
OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";\n";
}
}
}
if (insert_back_diffuse && insert_front_diffuse)