mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
vk: Emulate conditional rendering for AMD
This commit is contained in:
parent
93895838c7
commit
cdd9c12132
6 changed files with 109 additions and 20 deletions
|
@ -319,7 +319,7 @@ namespace rsx
|
|||
else
|
||||
{
|
||||
zcull_ctrl->read_barrier(this, cond_render_ctrl.eval_address, 4, reports::sync_no_notify);
|
||||
cond_render_ctrl.eval_result(this);
|
||||
verify(HERE), !cond_render_ctrl.eval_pending();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2959,8 +2959,6 @@ namespace rsx
|
|||
{
|
||||
if (hint || ptimer->async_tasks_pending >= max_safe_queue_depth)
|
||||
{
|
||||
verify(HERE), !active || !hint;
|
||||
|
||||
// Prepare the whole queue for reading. This happens when zcull activity is disabled or queue is too long
|
||||
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It)
|
||||
{
|
||||
|
|
|
@ -286,6 +286,13 @@ namespace
|
|||
|
||||
idx++;
|
||||
|
||||
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[idx].descriptorCount = 1;
|
||||
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[idx].binding = CONDITIONAL_RENDER_PREDICATE_SLOT;
|
||||
|
||||
idx++;
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; i++)
|
||||
{
|
||||
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
|
@ -311,6 +318,12 @@ namespace
|
|||
push_constants[0].size = 16;
|
||||
push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
||||
if (vk::emulate_conditional_rendering())
|
||||
{
|
||||
// Conditional render toggle
|
||||
push_constants[0].size = 20;
|
||||
}
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo infos = {};
|
||||
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||
infos.pBindings = bindings.data();
|
||||
|
@ -439,11 +452,13 @@ VKGSRender::VKGSRender() : GSRender()
|
|||
m_occlusion_query_data[n].driver_handle = n;
|
||||
|
||||
//Generate frame contexts
|
||||
VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS };
|
||||
VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS };
|
||||
VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS };
|
||||
std::vector<VkDescriptorPoolSize> sizes;
|
||||
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS });
|
||||
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS });
|
||||
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS });
|
||||
|
||||
std::vector<VkDescriptorPoolSize> sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool };
|
||||
// Conditional rendering predicate slot; refactor to allow skipping this when not needed
|
||||
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * DESCRIPTOR_MAX_DRAW_CALLS });
|
||||
|
||||
VkSemaphoreCreateInfo semaphore_info = {};
|
||||
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
|
@ -1153,7 +1168,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
|||
update_draw_state();
|
||||
begin_render_pass();
|
||||
|
||||
if (cond_render_ctrl.hw_cond_active)
|
||||
if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support())
|
||||
{
|
||||
// It is inconvenient that conditional rendering breaks other things like compute dispatch
|
||||
// TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch
|
||||
|
@ -2802,6 +2817,12 @@ void VKGSRender::load_program_env()
|
|||
m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
if (vk::emulate_conditional_rendering())
|
||||
{
|
||||
auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer()->value;
|
||||
m_program->bind_buffer({ predicate, 0, 4 }, CONDITIONAL_RENDER_PREDICATE_SLOT, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
//Clear flags
|
||||
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
|
||||
m_graphics_state &= ~handled_flags;
|
||||
|
@ -2826,13 +2847,21 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
|
|||
base_offset = 0;
|
||||
}
|
||||
|
||||
u32 draw_info[4];
|
||||
u8 data_size = 16;
|
||||
u32 draw_info[5];
|
||||
|
||||
draw_info[0] = vertex_info.vertex_index_base;
|
||||
draw_info[1] = vertex_info.vertex_index_offset;
|
||||
draw_info[2] = id;
|
||||
draw_info[3] = (id * 16) + (base_offset / 8);
|
||||
|
||||
vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 16, draw_info);
|
||||
if (vk::emulate_conditional_rendering())
|
||||
{
|
||||
draw_info[4] = cond_render_ctrl.hw_cond_active ? 1 : 0;
|
||||
data_size = 20;
|
||||
}
|
||||
|
||||
vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_size, draw_info);
|
||||
|
||||
const size_t data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
|
||||
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
|
||||
|
@ -3792,10 +3821,31 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
|||
if (!m_cond_render_buffer)
|
||||
{
|
||||
auto& memory_props = m_device->get_memory_mapping();
|
||||
auto usage_flags = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
|
||||
if (m_device->get_conditional_render_support())
|
||||
{
|
||||
usage_flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
|
||||
}
|
||||
|
||||
m_cond_render_buffer = std::make_unique<vk::buffer>(
|
||||
*m_device, 4,
|
||||
memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
usage_flags, 0);
|
||||
}
|
||||
|
||||
VkPipelineStageFlags dst_stage;
|
||||
VkAccessFlags dst_access;
|
||||
|
||||
if (m_device->get_conditional_render_support())
|
||||
{
|
||||
dst_stage = VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT;
|
||||
dst_access = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
|
||||
dst_access = VK_ACCESS_SHADER_READ_BIT;
|
||||
}
|
||||
|
||||
if (sources.size() == 1)
|
||||
|
@ -3809,8 +3859,8 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
|||
m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0);
|
||||
|
||||
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT);
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, dst_access);
|
||||
|
||||
rsx::thread::begin_conditional_rendering(sources);
|
||||
return;
|
||||
|
@ -3863,8 +3913,8 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
|||
vk::get_compute_task<vk::cs_aggregator>()->run(*m_current_command_buffer, m_cond_render_buffer.get(), scratch, dst_offset / 4);
|
||||
|
||||
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT);
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dst_stage,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, dst_access);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -90,6 +90,7 @@ namespace vk
|
|||
bool g_drv_no_primitive_restart_flag = false;
|
||||
bool g_drv_sanitize_fp_values = false;
|
||||
bool g_drv_disable_fence_reset = false;
|
||||
bool g_drv_emulate_cond_render = false;
|
||||
|
||||
u64 g_num_processed_frames = 0;
|
||||
u64 g_num_total_frames = 0;
|
||||
|
@ -425,6 +426,7 @@ namespace vk
|
|||
g_drv_no_primitive_restart_flag = false;
|
||||
g_drv_sanitize_fp_values = false;
|
||||
g_drv_disable_fence_reset = false;
|
||||
g_drv_emulate_cond_render = (g_cfg.video.relaxed_zcull_sync && !g_current_renderer->get_conditional_render_support());
|
||||
g_num_processed_frames = 0;
|
||||
g_num_total_frames = 0;
|
||||
g_heap_compatible_buffer_types = 0;
|
||||
|
@ -533,6 +535,11 @@ namespace vk
|
|||
return g_drv_disable_fence_reset;
|
||||
}
|
||||
|
||||
bool emulate_conditional_rendering()
|
||||
{
|
||||
return g_drv_emulate_cond_render;
|
||||
}
|
||||
|
||||
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask)
|
||||
{
|
||||
VkBufferMemoryBarrier barrier = {};
|
||||
|
|
|
@ -39,8 +39,9 @@
|
|||
#define FRAGMENT_STATE_BIND_SLOT 3
|
||||
#define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 4
|
||||
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 5
|
||||
#define TEXTURES_FIRST_BIND_SLOT 8
|
||||
#define VERTEX_TEXTURES_FIRST_BIND_SLOT 24 //8+16
|
||||
#define CONDITIONAL_RENDER_PREDICATE_SLOT 8
|
||||
#define TEXTURES_FIRST_BIND_SLOT 9
|
||||
#define VERTEX_TEXTURES_FIRST_BIND_SLOT (TEXTURES_FIRST_BIND_SLOT + 16)
|
||||
|
||||
#define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4)
|
||||
|
||||
|
@ -138,6 +139,7 @@ namespace vk
|
|||
bool emulate_primitive_restart(rsx::primitive_type type);
|
||||
bool sanitize_fp_values();
|
||||
bool fence_reset_disabled();
|
||||
bool emulate_conditional_rendering();
|
||||
VkFlags get_heap_compatible_buffer_types();
|
||||
driver_vendor get_driver_vendor();
|
||||
chip_class get_chip_family(uint32_t vendor_id, uint32_t device_id);
|
||||
|
|
|
@ -43,12 +43,26 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
|||
OS << " float z_far;\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
if (m_device_props.emulate_conditional_rendering)
|
||||
{
|
||||
OS << "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n";
|
||||
OS << "{\n";
|
||||
OS << " uint conditional_rendering_predicate;\n";
|
||||
OS << "};\n\n";
|
||||
}
|
||||
|
||||
OS << "layout(push_constant) uniform VertexLayoutBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uint draw_id;\n";
|
||||
OS << " uint layout_ptr_offset;\n";
|
||||
|
||||
if (m_device_props.emulate_conditional_rendering)
|
||||
{
|
||||
OS << " uint conditional_rendering_enabled;\n";
|
||||
}
|
||||
|
||||
OS << "};\n\n";
|
||||
|
||||
vk::glsl::program_input in;
|
||||
|
@ -238,9 +252,18 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
|
|||
OS << "}\n\n";
|
||||
|
||||
OS << "void main ()\n";
|
||||
OS << "{\n";
|
||||
OS << "{\n\n";
|
||||
|
||||
OS << "\n" << " vs_main();\n\n";
|
||||
if (m_device_props.emulate_conditional_rendering)
|
||||
{
|
||||
OS << " if (conditional_rendering_enabled != 0 && conditional_rendering_predicate == 0)\n";
|
||||
OS << " {\n";
|
||||
OS << " gl_Position = vec4(0.);\n";
|
||||
OS << " return;\n";
|
||||
OS << "}\n\n";
|
||||
}
|
||||
|
||||
OS << " vs_main();\n\n";
|
||||
|
||||
for (auto &i : reg_table)
|
||||
{
|
||||
|
@ -286,6 +309,8 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
|
|||
|
||||
void VKVertexDecompilerThread::Task()
|
||||
{
|
||||
m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering();
|
||||
|
||||
m_shader = Decompile();
|
||||
vk_prog->SetInputs(inputs);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#pragma once
|
||||
#pragma once
|
||||
#include "../Common/VertexProgramDecompiler.h"
|
||||
#include "Emu/RSX/RSXVertexProgram.h"
|
||||
#include "Utilities/Thread.h"
|
||||
|
@ -10,6 +10,13 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler
|
|||
std::string &m_shader;
|
||||
std::vector<vk::glsl::program_input> inputs;
|
||||
class VKVertexProgram *vk_prog;
|
||||
|
||||
struct
|
||||
{
|
||||
bool emulate_conditional_rendering;
|
||||
}
|
||||
m_device_props;
|
||||
|
||||
protected:
|
||||
std::string getFloatTypeName(size_t elementCount) override;
|
||||
std::string getIntTypeName(size_t elementCount) override;
|
||||
|
|
Loading…
Add table
Reference in a new issue