From b4bf48c33b411906a8adafd1fbb15cbc2f7db6a8 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 9 Apr 2020 20:50:27 +0300 Subject: [PATCH] vk: Integrate shader interpreter --- .../Interpreter/FragmentInterpreter.glsl | 12 +- .../Common/Interpreter/VertexInterpreter.glsl | 52 +- rpcs3/Emu/RSX/Common/ShaderInterpreter.h | 4 +- rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp | 8 +- rpcs3/Emu/RSX/VK/VKCompute.h | 4 +- rpcs3/Emu/RSX/VK/VKDraw.cpp | 121 ++++- rpcs3/Emu/RSX/VK/VKFragmentProgram.h | 7 + rpcs3/Emu/RSX/VK/VKGSRender.cpp | 179 +++++-- rpcs3/Emu/RSX/VK/VKGSRender.h | 14 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 6 +- rpcs3/Emu/RSX/VK/VKOverlays.h | 2 +- rpcs3/Emu/RSX/VK/VKPresent.cpp | 7 + rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 2 +- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 8 +- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 488 +++++++++++++++++- rpcs3/Emu/RSX/VK/VKShaderInterpreter.h | 55 +- rpcs3/Emu/RSX/VK/VKTextOut.h | 2 +- rpcs3/Emu/RSX/VK/VKVertexProgram.h | 7 + 18 files changed, 891 insertions(+), 87 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl index 0603a21c8c..f7550cc811 100644 --- a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl +++ b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl @@ -4,6 +4,8 @@ layout(location=1) out vec4 ocol1; layout(location=2) out vec4 ocol2; layout(location=3) out vec4 ocol3; +layout(location=0) in vec4 in_regs[16]; + #define RSX_FP_OPCODE_NOP 0x00 // No-Operation #define RSX_FP_OPCODE_MOV 0x01 // Move #define RSX_FP_OPCODE_MUL 0x02 // Multiply @@ -174,15 +176,17 @@ vec4 read_src(const in int index) // TODO: wpos value = vec4(0.); break; case 1: - value = gl_FrontFacing? in_regs[0] : in_regs[2]; break; - case 2: value = gl_FrontFacing? in_regs[1] : in_regs[3]; break; + case 2: + value = gl_FrontFacing? in_regs[2] : in_regs[4]; break; case 3: - value = fetch_fog_value(fog_mode, in_regs[4]); break; + value = fetch_fog_value(fog_mode, in_regs[5]); break; + case 13: + value = in_regs[6]; break; case 14: value = gl_FrontFacing? vec4(1.) : vec4(-1.); break; default: - value = in_regs[i + 1]; break; + value = in_regs[i + 3]; break; } break; diff --git a/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl b/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl index fd57d630f0..5580c65cd0 100644 --- a/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl +++ b/rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl @@ -1,5 +1,8 @@ R"( +// Program outputs +layout(location=0) out vec4 dest[16]; + #define RSX_SCA_OPCODE_NOP 0x00 // No-Operation #define RSX_SCA_OPCODE_MOV 0x01 // Move (copy) #define RSX_SCA_OPCODE_RCP 0x02 // Reciprocal @@ -193,7 +196,6 @@ uvec4 instr; vec4 temp[32]; ivec4 a[2] = { ivec4(0), ivec4(0) }; vec4 cc[2] = { vec4(0), vec4(0) }; -vec4 dest[16]; D0 d0; D1 d1; @@ -248,15 +250,11 @@ void write_vec(in vec4 value) } } -vec4 write_output(const in int oid, const in int mask_bit) +void write_output(const in int oid, const in int mask_bit) { - if (attribute_enabled(1 << mask_bit)) + if (!attribute_enabled(1 << mask_bit)) { - return dest[oid]; - } - else - { - return vec4(0., 0., 0., 1.); + dest[oid] = vec4(0., 0., 0., 1.); } } @@ -527,21 +525,19 @@ void main() } // TODO: 2-sided lighting - if (attribute_enabled(1 << 0 | 1 << 2)) + if (!attribute_enabled(1 << 0 | 1 << 2)) { - diff_color = dest[1]; - diff_color1 = dest[1]; + dest[1] = dest[3] = vec4(0, 0, 0, 1); } - if (attribute_enabled(1 << 1 | 1 << 3)) + if (!attribute_enabled(1 << 1 | 1 << 3)) { - spec_color = dest[2]; - spec_color1 = dest[2]; + dest[2] = dest[4] = vec4(0, 0, 0, 1); } - if (attribute_enabled(1 << 4)) + if (!attribute_enabled(1 << 4)) { - fog_c = dest[5].xxxx; + dest[5].x = 0; } if (attribute_enabled(1 << 5)) @@ -567,19 +563,23 @@ void main() gl_ClipDistance[5] = (user_clip_enabled[1].y > 0)? dest[6].w * user_clip_factor[1].y : 0.5f; } - tc8 = write_output(15, 12); - tc9 = write_output(6, 13); - tc0 = write_output(7, 14); - tc1 = write_output(8, 15); - tc2 = write_output(9, 16); - tc3 = write_output(10, 17); - tc4 = write_output(11, 18); - tc5 = write_output(12, 19); - tc6 = write_output(13, 20); - tc7 = write_output(14, 21); + write_output(15, 12); + write_output(6, 13); + write_output(7, 14); + write_output(8, 15); + write_output(9, 16); + write_output(10, 17); + write_output(11, 18); + write_output(12, 19); + write_output(13, 20); + write_output(14, 21); vec4 pos = dest[0] * scale_offset_mat; + +#ifdef Z_NEGATIVE_ONE_TO_ONE pos.z = (pos.z + pos.z) - pos.w; +#endif + gl_Position = pos; } diff --git a/rpcs3/Emu/RSX/Common/ShaderInterpreter.h b/rpcs3/Emu/RSX/Common/ShaderInterpreter.h index e9f00dc770..7ceedf224c 100644 --- a/rpcs3/Emu/RSX/Common/ShaderInterpreter.h +++ b/rpcs3/Emu/RSX/Common/ShaderInterpreter.h @@ -5,7 +5,7 @@ namespace program_common { namespace interpreter { - std::string get_vertex_interpreter() + static std::string get_vertex_interpreter() { const char* s = #include "Interpreter/VertexInterpreter.glsl" @@ -13,7 +13,7 @@ namespace program_common return s; } - std::string get_fragment_interpreter() + static std::string get_fragment_interpreter() { const char* s = #include "Interpreter/FragmentInterpreter.glsl" diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index b6eb5d9271..f4d82c268d 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -101,9 +101,11 @@ namespace gl std::stringstream builder; comp.insertHeader(builder); + + builder << "#define Z_NEGATIVE_ONE_TO_ONE\n\n"; + comp.insertConstants(builder, {}); comp.insertInputs(builder, {}); - comp.insertOutputs(builder, {}); // Insert vp stream input builder << "\n" @@ -180,10 +182,6 @@ namespace gl ::glsl::insert_subheader_block(builder); comp.insertConstants(builder); - // Declare custom inputs - builder << - "layout(location=1) in vec4 in_regs[15];\n\n"; - const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" }; for (int i = 0; i < 4; ++i) { diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 2ea4b37765..6ab1515aa5 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -60,7 +60,7 @@ namespace vk } // Reserve descriptor pools - m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes.data(), ::size32(descriptor_pool_sizes), VK_MAX_COMPUTE_TASKS, 2); + m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes.data(), ::size32(descriptor_pool_sizes), VK_MAX_COMPUTE_TASKS, 3); VkDescriptorSetLayoutCreateInfo infos = {}; infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -179,7 +179,7 @@ namespace vk VkPipeline pipeline; vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &info, nullptr, &pipeline); - m_program = std::make_unique(*get_current_renderer(), pipeline); + m_program = std::make_unique(*get_current_renderer(), pipeline, m_pipeline_layout); declare_inputs(); } diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 69fd749916..3689b9734a 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -571,6 +571,115 @@ void VKGSRender::bind_texture_env() } } +void VKGSRender::bind_interpreter_texture_env() +{ + std::array texture_env; + VkDescriptorImageInfo fallback = { vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, VK_IMAGE_VIEW_TYPE_1D)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + + auto start = texture_env.begin(); + auto end = start; + + // Fill default values + // 1D + std::advance(end, 16); + std::fill(start, end, fallback); + // 2D + start = end; + fallback.imageView = vk::null_image_view(*m_current_command_buffer, VK_IMAGE_VIEW_TYPE_2D)->value; + std::advance(end, 16); + std::fill(start, end, fallback); + // 3D + start = end; + fallback.imageView = vk::null_image_view(*m_current_command_buffer, VK_IMAGE_VIEW_TYPE_3D)->value; + std::advance(end, 16); + std::fill(start, end, fallback); + // CUBE + start = end; + fallback.imageView = vk::null_image_view(*m_current_command_buffer, VK_IMAGE_VIEW_TYPE_CUBE)->value; + std::advance(end, 16); + std::fill(start, end, fallback); + + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + if (current_fp_metadata.referenced_textures_mask & (1 << i)) + { + vk::image_view* view = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) + { + if (view = sampler_state->image_handle; !view) + { + //Requires update, copy subresource + view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); + } + else + { + switch (auto raw = view->image(); raw->current_layout) + { + default: + //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + case VK_IMAGE_LAYOUT_GENERAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; + if (!sampler_state->is_cyclic_reference) + { + // This was used in a cyclic ref before, but is missing a barrier + // No need for a full stall, use a custom barrier instead + VkPipelineStageFlags src_stage; + VkAccessFlags src_access; + if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) + { + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + else + { + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + vk::insert_image_memory_barrier( + *m_current_command_buffer, + raw->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, + { raw->aspect(), 0, 1, 0, 1 }); + + raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference; + raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + } + } + } + + if (view) + { + const int offsets[] = { 0, 16, 48, 32 }; + auto& sampled_image_info = texture_env[offsets[static_cast(sampler_state->image_type)] + i]; + sampled_image_info = { fs_sampler_handles[i]->value, view->value, view->image()->current_layout }; + } + } + } + + m_shader_interpreter.update_fragment_textures(texture_env, m_current_frame->descriptor_set); +} + void VKGSRender::emit_geometry(u32 sub_index) { auto &draw_call = rsx::method_registers.current_draw_clause; @@ -701,7 +810,7 @@ void VKGSRender::emit_geometry(u32 sub_index) } // Bind the new set of descriptors for use with this draw call - vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); + vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); m_frame_stats.setup_time += m_profiler.duration(); @@ -860,7 +969,15 @@ void VKGSRender::end() load_program_env(); m_frame_stats.setup_time += m_profiler.duration(); - bind_texture_env(); + if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]] + { + bind_texture_env(); + } + else + { + bind_interpreter_texture_env(); + } + m_texture_cache.release_uncached_temporary_subresources(); m_frame_stats.textures_upload_time += m_profiler.duration(); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index a4fbb28352..f544371c8b 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -5,8 +5,15 @@ #include "VulkanAPI.h" #include "VKHelpers.h" +namespace vk +{ + class shader_interpreter; +} + struct VKFragmentDecompilerThread : public FragmentProgramDecompiler { + friend class vk::shader_interpreter; + std::string& m_shader; ParamArray& m_parrDummy; std::vector inputs; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ce21e08813..cc5a125b84 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -465,6 +465,12 @@ VKGSRender::VKGSRender() : GSRender() m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); + if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled) + { + m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "vertex instructions buffer", 512 * 16); + m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "fragment instructions buffer", 2048); + } + const auto limits = m_device->gpu().get_limits(); m_texbuffer_view_size = std::min(limits.maxTexelBufferElements, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000u); @@ -544,6 +550,11 @@ VKGSRender::VKGSRender() : GSRender() m_occlusion_query_pool.initialize(*m_current_command_buffer); + if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled) + { + m_shader_interpreter.init(*m_device); + } + backend_config.supports_multidraw = true; // NOTE: We do not actually need multiple sample support for A2C to work @@ -582,6 +593,7 @@ VKGSRender::~VKGSRender() //Shaders vk::finalize_compiler_context(); m_prog_buffer->clear(); + m_shader_interpreter.destroy(); m_persistent_attribute_storage.reset(); m_volatile_attribute_storage.reset(); @@ -600,6 +612,8 @@ VKGSRender::~VKGSRender() m_transform_constants_ring_info.destroy(); m_index_buffer_ring_info.destroy(); m_texture_upload_buffer_ring_info.destroy(); + m_vertex_instructions_buffer.destroy(); + m_fragment_instructions_buffer.destroy(); //Fallback bindables null_buffer.reset(); @@ -937,19 +951,26 @@ void VKGSRender::check_descriptors() VkDescriptorSet VKGSRender::allocate_descriptor_set() { - verify(HERE), m_current_frame->used_descriptors < DESCRIPTOR_MAX_DRAW_CALLS; + if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]] + { + verify(HERE), m_current_frame->used_descriptors < DESCRIPTOR_MAX_DRAW_CALLS; - VkDescriptorSetAllocateInfo alloc_info = {}; - alloc_info.descriptorPool = m_current_frame->descriptor_pool; - alloc_info.descriptorSetCount = 1; - alloc_info.pSetLayouts = &descriptor_layouts; - alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + VkDescriptorSetAllocateInfo alloc_info = {}; + alloc_info.descriptorPool = m_current_frame->descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &descriptor_layouts; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - VkDescriptorSet new_descriptor_set; - CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set)); - m_current_frame->used_descriptors++; + VkDescriptorSet new_descriptor_set; + CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set)); + m_current_frame->used_descriptors++; - return new_descriptor_set; + return new_descriptor_set; + } + else + { + return m_shader_interpreter.allocate_descriptor_set(); + } } void VKGSRender::set_viewport() @@ -1408,7 +1429,7 @@ void VKGSRender::do_local_task(rsx::FIFO_state state) bool VKGSRender::load_program() { - if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits) + if (m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; @@ -1552,38 +1573,82 @@ bool VKGSRender::load_program() } properties.renderpass_key = m_current_renderpass_key; + vk::glsl::program* active_interpreter = nullptr; - vk::enter_uninterruptible(); + if (!m_interpreter_state && m_pipeline_properties == properties) [[likely]] + { + // Nothing changed + if (m_shader_interpreter.is_interpreter(m_program)) + { + if (g_cfg.video.shader_interpreter_mode == shader_interpreter_mode::forced) + { + return true; + } - //Load current program from buffer - vertex_program.skip_vertex_input_check = true; - fragment_program.unnormalized_coords = 0; - m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties, + active_interpreter = m_program; + } + else + { + return true; + } + } + + if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::forced) [[likely]] + { + vk::enter_uninterruptible(); + + //Load current program from buffer + vertex_program.skip_vertex_input_check = true; + fragment_program.unnormalized_coords = 0; + m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties, !g_cfg.video.disable_asynchronous_shader_compiler, true, *m_device, pipeline_layout).get(); - vk::leave_uninterruptible(); + vk::leave_uninterruptible(); - if (m_prog_buffer->check_cache_missed()) - { - // Notify the user with HUD notification - if (g_cfg.misc.show_shader_compilation_hint) + if (m_prog_buffer->check_cache_missed()) { - if (m_overlay_manager) + // Notify the user with HUD notification + if (g_cfg.misc.show_shader_compilation_hint) { - if (auto dlg = m_overlay_manager->get()) + if (m_overlay_manager) { - // Extend duration - dlg->touch(); - } - else - { - // Create dialog but do not show immediately - m_overlay_manager->create(); + if (auto dlg = m_overlay_manager->get()) + { + // Extend duration + dlg->touch(); + } + else + { + // Create dialog but do not show immediately + m_overlay_manager->create(); + } } } } } + else + { + m_program = nullptr; + } + if (!m_program && g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled) + { + if (!m_shader_interpreter.is_interpreter(old_program)) + { + m_interpreter_state = rsx::invalidate_pipeline_bits; + } + + if (active_interpreter) [[likely]] + { + m_program = active_interpreter; + } + else + { + m_program = m_shader_interpreter.get(properties); + } + } + + m_pipeline_properties = properties; return m_program != nullptr; } @@ -1601,6 +1666,7 @@ void VKGSRender::load_program_env() const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty); const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); + const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program)); if (update_vertex_env) { @@ -1634,7 +1700,7 @@ void VKGSRender::load_program_env() m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem, 8192 }; } - if (update_fragment_constants) + if (update_fragment_constants && !update_instruction_buffers) { check_heap_status(VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE); @@ -1680,14 +1746,63 @@ void VKGSRender::load_program_env() m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 }; } + if (update_instruction_buffers) + { + if (m_interpreter_state & rsx::vertex_program_dirty) + { + // Attach vertex buffer data + const auto vp_block_length = current_vp_metadata.ucode_length + 16; + auto vp_mapping = m_vertex_instructions_buffer.alloc<256>(vp_block_length); + auto vp_buf = static_cast(m_vertex_instructions_buffer.map(vp_mapping, vp_block_length)); + + auto vp_config = reinterpret_cast(vp_buf); + vp_config[0] = current_vertex_program.base_address; + vp_config[1] = current_vertex_program.entry; + vp_config[2] = current_vertex_program.output_mask; + + std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length); + m_vertex_instructions_buffer.unmap(); + + m_vertex_instructions_buffer_info = { m_vertex_instructions_buffer.heap->value, vp_mapping, vp_block_length }; + } + + if (m_interpreter_state & rsx::fragment_program_dirty) + { + // Attach fragment buffer data + const auto fp_block_length = current_fp_metadata.program_ucode_length + 16; + auto fp_mapping = m_fragment_instructions_buffer.alloc<256>(fp_block_length); + auto fp_buf = static_cast(m_fragment_instructions_buffer.map(fp_mapping, fp_block_length)); + + // Control mask + const auto control_masks = reinterpret_cast(fp_buf); + control_masks[0] = rsx::method_registers.shader_control(); + control_masks[1] = current_fragment_program.texture_dimensions; + + const auto fp_data = static_cast(current_fragment_program.addr) + current_fp_metadata.program_start_offset; + std::memcpy(fp_buf + 16, fp_data, current_fp_metadata.program_ucode_length); + m_fragment_instructions_buffer.unmap(); + + m_fragment_instructions_buffer_info = { m_fragment_instructions_buffer.heap->value, fp_mapping, fp_block_length }; + } + } + const auto& binding_table = m_device->get_pipeline_binding_table(); m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set); m_program->bind_uniform(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set); m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set); m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set); + if (!m_shader_interpreter.is_interpreter(m_program)) + { + m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set); + } + else + { + m_program->bind_buffer(m_vertex_instructions_buffer_info, m_shader_interpreter.get_vertex_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_buffer(m_fragment_instructions_buffer_info, m_shader_interpreter.get_fragment_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + } + if (vk::emulate_conditional_rendering()) { auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer()->value; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 2db9ec9b7a..96c9a1d17c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -8,6 +8,7 @@ #include "VKOverlays.h" #include "VKProgramBuffer.h" #include "VKFramebuffer.h" +#include "VKShaderInterpreter.h" #include "../GCM.h" #include @@ -349,7 +350,8 @@ private: private: VKFragmentProgram m_fragment_prog; VKVertexProgram m_vertex_prog; - vk::glsl::program *m_program; + vk::glsl::program *m_program = nullptr; + vk::pipeline_props m_pipeline_properties; vk::texture_cache m_texture_cache; rsx::vk_render_targets m_rtts; @@ -428,6 +430,9 @@ private: vk::data_heap m_index_buffer_ring_info; // Index data vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap + vk::data_heap m_fragment_instructions_buffer; + vk::data_heap m_vertex_instructions_buffer; + VkDescriptorBufferInfo m_vertex_env_buffer_info; VkDescriptorBufferInfo m_fragment_env_buffer_info; VkDescriptorBufferInfo m_vertex_layout_stream_info; @@ -435,6 +440,9 @@ private: VkDescriptorBufferInfo m_fragment_constants_buffer_info; VkDescriptorBufferInfo m_fragment_texture_params_buffer_info; + VkDescriptorBufferInfo m_vertex_instructions_buffer_info; + VkDescriptorBufferInfo m_fragment_instructions_buffer_info; + std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage vk::frame_context_t m_aux_frame_context; @@ -464,6 +472,9 @@ private: //Vertex layout rsx::vertex_input_layout m_vertex_layout; + vk::shader_interpreter m_shader_interpreter; + u32 m_interpreter_state; + #if defined(HAVE_X11) && defined(HAVE_VULKAN) Display *m_display_handle = nullptr; #endif @@ -512,6 +523,7 @@ private: void load_texture_env(); void bind_texture_env(); + void bind_interpreter_texture_env(); public: void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index afac71b90e..e049174bb7 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -3629,11 +3629,12 @@ public: public: VkPipeline pipeline; + VkPipelineLayout pipeline_layout; u64 attribute_location_mask; u64 vertex_attributes_mask; - program(VkDevice dev, VkPipeline p, const std::vector &vertex_input, const std::vector& fragment_inputs); - program(VkDevice dev, VkPipeline p); + program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs); + program(VkDevice dev, VkPipeline p, VkPipelineLayout layout); program(const program&) = delete; program(program&& other) = delete; ~program(); @@ -3649,6 +3650,7 @@ public: void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set); void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set); + void bind_descriptor_set(const VkCommandBuffer cmd, VkDescriptorSet descriptor_set); }; } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 92b9a1b8af..72534f3690 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -250,7 +250,7 @@ namespace vk CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, nullptr, 1, &info, NULL, &pipeline)); - auto program = std::make_unique(*m_device, pipeline, get_vertex_inputs(), get_fragment_inputs()); + auto program = std::make_unique(*m_device, pipeline, m_pipeline_layout, get_vertex_inputs(), get_fragment_inputs()); auto result = program.get(); m_program_cache[storage_key] = std::move(program); diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp index 899b10d931..5c56870751 100644 --- a/rpcs3/Emu/RSX/VK/VKPresent.cpp +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -217,6 +217,13 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resou ctx->buffer_views_to_clean.clear(); + if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled) + { + // TODO: This is jank AF + m_vertex_instructions_buffer.reset_allocation_stats(); + m_fragment_instructions_buffer.reset_allocation_stats(); + } + if (ctx->last_frame_sync_time > m_last_heap_sync_time) { m_last_heap_sync_time = ctx->last_frame_sync_time; diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 5f76045b9e..f2159d47dd 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -178,7 +178,7 @@ struct VKTraits CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline)); - pipeline_storage_type result = std::make_unique(dev, pipeline, vertexProgramData.uniforms, fragmentProgramData.uniforms); + pipeline_storage_type result = std::make_unique(dev, pipeline, common_pipeline_layout, vertexProgramData.uniforms, fragmentProgramData.uniforms); result->link(); return result; } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index e396348476..7bdcb55d76 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -20,16 +20,16 @@ namespace vk vs_texture_bindings.fill(~0u); } - program::program(VkDevice dev, VkPipeline p, const std::vector &vertex_input, const std::vector& fragment_inputs) - : m_device(dev), pipeline(p) + program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs) + : m_device(dev), pipeline(p), pipeline_layout(layout) { create_impl(); load_uniforms(vertex_input); load_uniforms(fragment_inputs); } - program::program(VkDevice dev, VkPipeline p) - : m_device(dev), pipeline(p) + program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout) + : m_device(dev), pipeline(p), pipeline_layout(layout) { create_impl(); } diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index d2e86656e2..a78e48c788 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -1,7 +1,493 @@ -#include "stdafx.h" +#include "stdafx.h" #include "VKShaderInterpreter.h" +#include "VKVertexProgram.h" +#include "VKFragmentProgram.h" +#include "VKGSRender.h" +#include "../Common/GLSLCommon.h" +#include "../Common/ShaderInterpreter.h" namespace vk { + void shader_interpreter::build_vs() + { + ::glsl::shader_properties properties{}; + properties.domain = ::glsl::program_domain::glsl_vertex_program; + properties.require_lit_emulation = true; + // TODO: Extend decompiler thread + // TODO: Rename decompiler thread, it no longer spawns a thread + RSXVertexProgram null_prog; + std::string shader_str; + ParamArray arr; + VKVertexProgram vk_prog; + VKVertexDecompilerThread comp(null_prog, shader_str, arr, vk_prog); + + std::stringstream builder; + comp.insertHeader(builder); + comp.insertConstants(builder, {}); + comp.insertInputs(builder, {}); + + // Insert vp stream input + builder << "\n" + "layout(std140, set=0, binding=" << m_vertex_instruction_start << ") readonly restrict buffer VertexInstructionBlock\n" + "{\n" + " uint base_address;\n" + " uint entry;\n" + " uint output_mask;\n" + " uint reserved;\n" + " uvec4 vp_instructions[];\n" + "};\n\n"; + + ::glsl::insert_glsl_legacy_function(builder, properties); + ::glsl::insert_vertex_input_fetch(builder, ::glsl::glsl_rules::glsl_rules_spirv); + + builder << program_common::interpreter::get_vertex_interpreter(); + const std::string s = builder.str(); + + m_vs.create(::glsl::program_domain::glsl_vertex_program, s); + m_vs.compile(); + + // Prepare input table + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + vk::glsl::program_input in; + + in.location = binding_table.vertex_params_bind_slot;; + in.domain = ::glsl::glsl_vertex_program; + in.name = "VertexContextBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + m_vs_inputs.push_back(in); + + in.location = binding_table.vertex_buffers_first_bind_slot; + in.name = "persistent_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + m_vs_inputs.push_back(in); + + in.location = binding_table.vertex_buffers_first_bind_slot + 1; + in.name = "volatile_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + m_vs_inputs.push_back(in); + + in.location = binding_table.vertex_buffers_first_bind_slot + 2; + in.name = "vertex_layout_stream"; + in.type = vk::glsl::input_type_texel_buffer; + m_vs_inputs.push_back(in); + + in.location = binding_table.vertex_constant_buffers_bind_slot; + in.name = "VertexConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + m_vs_inputs.push_back(in); + + // TODO: Bind textures if needed + } + + void shader_interpreter::build_fs() + { + ::glsl::shader_properties properties{}; + properties.domain = ::glsl::program_domain::glsl_fragment_program; + properties.require_depth_conversion = true; + properties.require_wpos = true; + + u32 len; + ParamArray arr; + std::string shader_str; + RSXFragmentProgram frag; + VKFragmentProgram vk_prog; + VKFragmentDecompilerThread comp(shader_str, arr, frag, len, vk_prog); + + std::stringstream builder; + builder << + "#version 450\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; + + ::glsl::insert_subheader_block(builder); + comp.insertConstants(builder); + + const char* type_names[] = { "sampler1D", "sampler2D", "sampler3D", "samplerCube" }; + for (int i = 0, bind_location = m_fragment_textures_start; i < 4; ++i) + { + builder << "layout(set=0, binding=" << bind_location++ << ") " << "uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; + } + + builder << "\n" + "#define IS_TEXTURE_RESIDENT(index) true\n" + "#define SAMPLER1D(index) sampler1D_array[index]\n" + "#define SAMPLER2D(index) sampler2D_array[index]\n" + "#define SAMPLER3D(index) sampler3D_array[index]\n" + "#define SAMPLERCUBE(index) samplerCube_array[index]\n\n"; + + builder << + "layout(std430, binding=" << m_fragment_instruction_start << ") readonly restrict buffer FragmentInstructionBlock\n" + "{\n" + " uint shader_control;\n" + " uint texture_control;\n" + " uint reserved1;\n" + " uint reserved2;\n" + " uvec4 fp_instructions[];\n" + "};\n\n"; + + ::program_common::insert_fog_declaration(builder, "vec4", "fogc", true); + + builder << program_common::interpreter::get_fragment_interpreter(); + const std::string s = builder.str(); + + m_fs.create(::glsl::program_domain::glsl_fragment_program, s); + m_fs.compile(); + + // Prepare input table + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + vk::glsl::program_input in; + in.location = binding_table.fragment_constant_buffers_bind_slot; + in.domain = ::glsl::glsl_fragment_program; + in.name = "FragmentConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + m_fs_inputs.push_back(in); + + in.location = binding_table.fragment_state_bind_slot; + in.name = "FragmentStateBuffer"; + m_fs_inputs.push_back(in); + + in.location = binding_table.fragment_texture_params_bind_slot; + in.name = "TextureParametersBuffer"; + m_fs_inputs.push_back(in); + + for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) + { + in.location = location; + in.name = std::string(type_names[i]) + "_array[16]"; + m_fs_inputs.push_back(in); + } + } + + std::pair shader_interpreter::create_layout(VkDevice dev) + { + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + std::vector bindings(binding_table.total_descriptor_bindings); + + uint32_t idx = 0; + + // Vertex stream, one stream for cacheable data, one stream for transient data. Third stream contains vertex layout info + for (int i = 0; i < 3; i++) + { + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i; + idx++; + } + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.fragment_state_bind_slot; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.fragment_texture_params_bind_slot; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + bindings[idx].binding = binding_table.vertex_params_bind_slot; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.conditional_render_predicate_slot; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot; + + m_fragment_textures_start = bindings[idx].binding; + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 1; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 2; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 3; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 4; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 4; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 5; + + m_vertex_instruction_start = bindings[idx].binding; + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 6; + + m_fragment_instruction_start = bindings[idx].binding; + idx++; + bindings.resize(idx); + + std::array push_constants; + push_constants[0].offset = 0; + push_constants[0].size = 16; + push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + if (vk::emulate_conditional_rendering()) + { + // Conditional render toggle + push_constants[0].size = 20; + } + + VkDescriptorSetLayoutCreateInfo infos = {}; + infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + infos.pBindings = bindings.data(); + infos.bindingCount = static_cast(bindings.size()); + + VkDescriptorSetLayout set_layout; + CHECK_RESULT(vkCreateDescriptorSetLayout(dev, &infos, nullptr, &set_layout)); + + VkPipelineLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.setLayoutCount = 1; + layout_info.pSetLayouts = &set_layout; + layout_info.pushConstantRangeCount = 1; + layout_info.pPushConstantRanges = push_constants.data(); + + VkPipelineLayout result; + CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); + return { set_layout, result }; + } + + void shader_interpreter::create_descriptor_pools(const vk::render_device& dev) + { + std::vector sizes; + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 68 * DESCRIPTOR_MAX_DRAW_CALLS }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 * DESCRIPTOR_MAX_DRAW_CALLS }); + + m_descriptor_pool.create(dev, sizes.data(), ::size32(sizes), DESCRIPTOR_MAX_DRAW_CALLS, 2); + } + + void shader_interpreter::init(const vk::render_device& dev) + { + m_device = dev; + std::tie(m_shared_descriptor_layout, m_shared_pipeline_layout) = create_layout(dev); + create_descriptor_pools(dev); + + build_vs(); + build_fs(); + + // TODO: Seed the cache + } + + void shader_interpreter::destroy() + { + m_vs.destroy(); + m_fs.destroy(); + + m_program_cache.clear(); + m_descriptor_pool.destroy(); + + if (m_shared_pipeline_layout) + { + vkDestroyPipelineLayout(m_device, m_shared_pipeline_layout, nullptr); + m_shared_pipeline_layout = VK_NULL_HANDLE; + } + + if (m_shared_descriptor_layout) + { + vkDestroyDescriptorSetLayout(m_device, m_shared_descriptor_layout, nullptr); + m_shared_descriptor_layout = VK_NULL_HANDLE; + } + } + + glsl::program* shader_interpreter::link(const vk::pipeline_props& properties) + { + VkPipelineShaderStageCreateInfo shader_stages[2] = {}; + shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stages[0].module = m_vs.get_handle(); + shader_stages[0].pName = "main"; + + shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stages[1].module = m_fs.get_handle(); + shader_stages[1].pName = "main"; + + VkDynamicState dynamic_state_descriptors[VK_DYNAMIC_STATE_RANGE_SIZE] = {}; + VkPipelineDynamicStateCreateInfo dynamic_state_info = {}; + dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_LINE_WIDTH; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_BLEND_CONSTANTS; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BIAS; + + if (vk::get_current_renderer()->get_depth_bounds_support()) + { + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS; + } + + dynamic_state_info.pDynamicStates = dynamic_state_descriptors; + + VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO }; + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.viewportCount = 1; + vp.scissorCount = 1; + + VkPipelineMultisampleStateCreateInfo ms = properties.state.ms; + verify("Multisample state mismatch!" HERE), ms.rasterizationSamples == VkSampleCountFlagBits((properties.renderpass_key >> 16) & 0xF); + if (ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT) + { + // Update the sample mask pointer + ms.pSampleMask = &properties.state.temp_storage.msaa_sample_mask; + } + + // Rebase pointers from pipeline structure in case it is moved/copied + VkPipelineColorBlendStateCreateInfo cs = properties.state.cs; + cs.pAttachments = properties.state.att_state; + + VkPipeline pipeline; + VkGraphicsPipelineCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + info.pVertexInputState = &vi; + info.pInputAssemblyState = &properties.state.ia; + info.pRasterizationState = &properties.state.rs; + info.pColorBlendState = &cs; + info.pMultisampleState = &ms; + info.pViewportState = &vp; + info.pDepthStencilState = &properties.state.ds; + info.stageCount = 2; + info.pStages = shader_stages; + info.pDynamicState = &dynamic_state_info; + info.layout = m_shared_pipeline_layout; + info.basePipelineIndex = -1; + info.basePipelineHandle = VK_NULL_HANDLE; + info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key); + + CHECK_RESULT(vkCreateGraphicsPipelines(m_device, nullptr, 1, &info, NULL, &pipeline)); + return new vk::glsl::program(m_device, pipeline, m_shared_pipeline_layout, m_vs_inputs, m_fs_inputs); + } + + void shader_interpreter::update_fragment_textures(const std::array& sampled_images, VkDescriptorSet descriptor_set) + { + const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); + for (uint32_t i = 0, binding = m_fragment_textures_start; i < 4; ++i, ++binding, texture_ptr += 16) + { + const VkWriteDescriptorSet descriptor_writer = + { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType + nullptr, // pNext + descriptor_set, // dstSet + binding, // dstBinding + 0, // dstArrayElement + 16, // descriptorCount + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, // descriptorType + texture_ptr, // pImageInfo + nullptr, // pBufferInfo + nullptr // pTexelBufferView + }; + + vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); + } + } + + VkDescriptorSet shader_interpreter::allocate_descriptor_set() + { + if (m_used_descriptors == DESCRIPTOR_MAX_DRAW_CALLS) + { + m_descriptor_pool.reset(0); + m_used_descriptors = 0; + } + + VkDescriptorSetAllocateInfo alloc_info = {}; + alloc_info.descriptorPool = m_descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &m_shared_descriptor_layout; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + + VkDescriptorSet new_descriptor_set; + CHECK_RESULT(vkAllocateDescriptorSets(m_device, &alloc_info, &new_descriptor_set)); + + m_used_descriptors++; + return new_descriptor_set; + } + + glsl::program* shader_interpreter::get(const vk::pipeline_props& properties) + { + auto found = m_program_cache.find(properties); + if (found != m_program_cache.end()) [[likely]] + { + m_current_interpreter = found->second.get(); + return m_current_interpreter; + } + + m_current_interpreter = link(properties); + m_program_cache[properties].reset(m_current_interpreter); + return m_current_interpreter; + } + + bool shader_interpreter::is_interpreter(const glsl::program* prog) const + { + return prog == m_current_interpreter; + } + + uint32_t shader_interpreter::get_vertex_instruction_location() const + { + return m_vertex_instruction_start; + } + + uint32_t shader_interpreter::get_fragment_instruction_location() const + { + return m_fragment_instruction_start; + } }; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h index c820cddec9..1dc9d8edd9 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h @@ -1,9 +1,58 @@ -#pragma once -#include "VKGSRender.h" +#pragma once +#include "VKProgramBuffer.h" namespace vk { - class shader_interpreter : glsl::program + using ::program_hash_util::fragment_program_utils; + using ::program_hash_util::vertex_program_utils; + + class shader_interpreter { + glsl::shader m_vs; + glsl::shader m_fs; + + std::vector m_vs_inputs; + std::vector m_fs_inputs; + + VkDevice m_device = VK_NULL_HANDLE; + VkDescriptorSetLayout m_shared_descriptor_layout = VK_NULL_HANDLE; + VkPipelineLayout m_shared_pipeline_layout = VK_NULL_HANDLE; + glsl::program* m_current_interpreter = nullptr; + + struct key_hasher + { + size_t operator()(const vk::pipeline_props& props) const + { + return rpcs3::hash_struct(props); + } + }; + + std::unordered_map, key_hasher> m_program_cache; + vk::descriptor_pool m_descriptor_pool; + size_t m_used_descriptors = 0; + + uint32_t m_vertex_instruction_start = 0; + uint32_t m_fragment_instruction_start = 0; + uint32_t m_fragment_textures_start = 0; + + std::pair create_layout(VkDevice dev); + void create_descriptor_pools(const vk::render_device& dev); + + void build_vs(); + void build_fs(); + glsl::program* link(const vk::pipeline_props& properties); + + public: + void init(const vk::render_device& dev); + void destroy(); + + glsl::program* get(const vk::pipeline_props& properties); + bool is_interpreter(const glsl::program* prog) const; + + uint32_t get_vertex_instruction_location() const; + uint32_t get_fragment_instruction_location() const; + + void update_fragment_textures(const std::array& sampled_images, VkDescriptorSet descriptor_set); + VkDescriptorSet allocate_descriptor_set(); }; } diff --git a/rpcs3/Emu/RSX/VK/VKTextOut.h b/rpcs3/Emu/RSX/VK/VKTextOut.h index d0ab782bf6..537e7dfe43 100644 --- a/rpcs3/Emu/RSX/VK/VKTextOut.h +++ b/rpcs3/Emu/RSX/VK/VKTextOut.h @@ -196,7 +196,7 @@ namespace vk CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline)); const std::vector unused; - m_program = std::make_unique(static_cast(dev), pipeline, unused, unused); + m_program = std::make_unique(static_cast(dev), pipeline, m_pipeline_layout, unused, unused); } void load_program(vk::command_buffer &cmd, float scale_x, float scale_y, const float *offsets, size_t nb_offsets, std::array color) diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index f3256acb99..7a4ce12931 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -5,8 +5,15 @@ #include "VulkanAPI.h" #include "../VK/VKHelpers.h" +namespace vk +{ + class shader_interpreter; +} + struct VKVertexDecompilerThread : public VertexProgramDecompiler { + friend class vk::shader_interpreter; + std::string &m_shader; std::vector inputs; class VKVertexProgram *vk_prog;