vk: Refactor some uber-headers

- VKHelpers was the rug everything was swept under for a long time.
  This commit essentially deprecates its usage across most of the backend.
This commit is contained in:
kd-11 2021-01-09 21:46:50 +03:00 committed by kd-11
parent 9d74d42f2a
commit c2cbc62be6
74 changed files with 3932 additions and 3760 deletions

View file

@ -435,14 +435,17 @@ target_sources(rpcs3_emu PRIVATE
if(TARGET 3rdparty_vulkan)
target_sources(rpcs3_emu PRIVATE
RSX/VK/vkutils/buffer_view.cpp
RSX/VK/vkutils/barriers.cpp
RSX/VK/vkutils/buffer_object.cpp
RSX/VK/vkutils/chip_class.cpp
RSX/VK/vkutils/command_pool.cpp
RSX/VK/vkutils/fence.cpp
RSX/VK/vkutils/mem_allocator.cpp
RSX/VK/vkutils/memory_block.cpp
RSX/VK/vkutils/physical_device.cpp
RSX/VK/vkutils/render_device.cpp
RSX/VK/vkutils/commands.cpp
RSX/VK/vkutils/data_heap.cpp
RSX/VK/vkutils/image.cpp
RSX/VK/vkutils/image_helpers.cpp
RSX/VK/vkutils/scratch.cpp
RSX/VK/vkutils/sync.cpp
RSX/VK/vkutils/memory.cpp
RSX/VK/vkutils/device.cpp
RSX/VK/vkutils/sampler.cpp
RSX/VK/vkutils/shared.cpp
RSX/VK/VKCommandStream.cpp

View file

@ -1,6 +1,6 @@
#include "stdafx.h"
#include "VKCommandStream.h"
#include "vkutils/fence.h"
#include "vkutils/sync.h"
#include "Emu/IdManager.h"
#include "Emu/system_config.h"
#include "Emu/RSX/RSXOffload.h"

View file

@ -1,10 +1,14 @@
#pragma once
#include "VKHelpers.h"
#include "VKPipelineCompiler.h"
#include "VKRenderPass.h"
#include "vkutils/descriptors.hpp"
#include "Utilities/StrUtil.h"
#include "Emu/IdManager.h"
#include "VKPipelineCompiler.h"
#include "VKRenderPass.h"
#include "VKHelpers.h"
#include "vkutils/buffer_object.h"
#include "vkutils/device.h"
#include "util/asm.hpp"
#include <unordered_map>
@ -65,14 +69,14 @@ namespace vk
}
// Reserve descriptor pools
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes.data(), ::size32(descriptor_pool_sizes), VK_MAX_COMPUTE_TASKS, 3);
m_descriptor_pool.create(*g_render_device, descriptor_pool_sizes.data(), ::size32(descriptor_pool_sizes), VK_MAX_COMPUTE_TASKS, 3);
VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
infos.pBindings = bindings.data();
infos.bindingCount = ::size32(bindings);
CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout));
CHECK_RESULT(vkCreateDescriptorSetLayout(*g_render_device, &infos, nullptr, &m_descriptor_layout));
VkPipelineLayoutCreateInfo layout_info = {};
layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
@ -89,7 +93,7 @@ namespace vk
layout_info.pPushConstantRanges = &push_constants;
}
CHECK_RESULT(vkCreatePipelineLayout(*get_current_renderer(), &layout_info, nullptr, &m_pipeline_layout));
CHECK_RESULT(vkCreatePipelineLayout(*g_render_device, &layout_info, nullptr, &m_pipeline_layout));
}
void create()
@ -123,7 +127,7 @@ namespace vk
break;
}
const auto& gpu = vk::get_current_renderer()->gpu();
const auto& gpu = vk::g_render_device->gpu();
max_invocations_x = gpu.get_limits().maxComputeWorkGroupCount[0];
initialized = true;
@ -138,8 +142,8 @@ namespace vk
m_program.reset();
m_param_buffer.reset();
vkDestroyDescriptorSetLayout(*get_current_renderer(), m_descriptor_layout, nullptr);
vkDestroyPipelineLayout(*get_current_renderer(), m_pipeline_layout, nullptr);
vkDestroyDescriptorSetLayout(*g_render_device, m_descriptor_layout, nullptr);
vkDestroyPipelineLayout(*g_render_device, m_pipeline_layout, nullptr);
m_descriptor_pool.destroy();
initialized = false;
@ -194,7 +198,7 @@ namespace vk
alloc_info.pSetLayouts = &m_descriptor_layout;
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
CHECK_RESULT(vkAllocateDescriptorSets(*get_current_renderer(), &alloc_info, &m_descriptor_set));
CHECK_RESULT(vkAllocateDescriptorSets(*g_render_device, &alloc_info, &m_descriptor_set));
m_used_descriptors++;
bind_resources();

View file

@ -1,7 +1,9 @@
#include "stdafx.h"
#include "VKHelpers.h"
#include "VKResourceManager.h"
#include "VKDMA.h"
#include "vkutils/device.h"
#include "Emu/Memory/vm.h"
#include "util/asm.hpp"
#include <unordered_map>
@ -257,7 +259,7 @@ namespace vk
}
auto &block_info = g_dma_pool[first_block];
block_info.init(*vk::get_current_renderer(), first_block, s_dma_block_length);
block_info.init(*g_render_device, first_block, s_dma_block_length);
return block_info.get(map_range);
}
@ -291,13 +293,13 @@ namespace vk
if (entry->end() < limit)
{
auto new_length = block_end - block_head->start();
block_head->extend(cmd, *vk::get_current_renderer(), new_length);
block_head->extend(cmd, *g_render_device, new_length);
}
}
else
{
auto required_size = (block_end - block);
block_head->init(*vk::get_current_renderer(), block, required_size);
block_head->init(*g_render_device, block, required_size);
}
}
else

View file

@ -1,16 +1,9 @@
#pragma once
namespace utils
{
class address_range;
}
#include "vkutils/buffer_object.h"
#include "vkutils/commands.h"
namespace vk
{
struct buffer;
class command_buffer;
class render_device;
std::pair<u32, vk::buffer*> map_dma(command_buffer& cmd, u32 local_address, u32 length);
void load_dma(u32 local_address, u32 length);
void flush_dma(u32 local_address, u32 length);

View file

@ -2,7 +2,7 @@
#include "../Common/BufferUtils.h"
#include "../rsx_methods.h"
#include "VKGSRender.h"
#include "vkutils/buffer_view.h"
#include "vkutils/buffer_object.h"
namespace vk
{

View file

@ -1,6 +1,7 @@
#include "stdafx.h"
#include "VKFormats.h"
#include "VKHelpers.h"
#include "vkutils/device.h"
#include "vkutils/image.h"
namespace vk
{

View file

@ -2,6 +2,8 @@
#include "VKFragmentProgram.h"
#include "VKCommonDecompiler.h"
#include "VKHelpers.h"
#include "vkutils/device.h"
#include "Emu/system_config.h"
#include "../Common/GLSLCommon.h"
#include "../GCM.h"
@ -369,7 +371,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
void VKFragmentDecompilerThread::Task()
{
m_binding_table = vk::get_current_renderer()->get_pipeline_binding_table();
m_binding_table = vk::g_render_device->get_pipeline_binding_table();
m_shader = Decompile();
vk_prog->SetInputs(inputs);
}

View file

@ -1,6 +1,9 @@
#include "stdafx.h"
#include "VKFramebuffer.h"
#include "vkutils/image.h"
#include "vkutils/image_helpers.h"
#include <unordered_map>
namespace vk
@ -26,7 +29,7 @@ namespace vk
for (auto &e : image_list)
{
const VkImageSubresourceRange subres = { e->aspect(), 0, 1, 0, 1 };
image_views.push_back(std::make_unique<vk::image_view>(dev, e, VK_IMAGE_VIEW_TYPE_2D, vk::default_component_map(), subres));
image_views.push_back(std::make_unique<vk::image_view>(dev, e, VK_IMAGE_VIEW_TYPE_2D, vk::default_component_map, subres));
}
auto value = std::make_unique<vk::framebuffer_holder>(dev, renderpass, width, height, std::move(image_views));
@ -52,7 +55,7 @@ namespace vk
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
std::vector<std::unique_ptr<vk::image_view>> views;
views.push_back(std::make_unique<vk::image_view>(dev, attachment, VK_IMAGE_VIEW_TYPE_2D, format, vk::default_component_map(), range));
views.push_back(std::make_unique<vk::image_view>(dev, attachment, VK_IMAGE_VIEW_TYPE_2D, format, vk::default_component_map, range));
auto value = std::make_unique<vk::framebuffer_holder>(dev, renderpass, width, height, std::move(views));
auto ret = value.get();

View file

@ -1,6 +1,6 @@
#pragma once
#include "VKHelpers.h"
#include "vkutils/framebuffer_object.hpp"
namespace vk
{

View file

@ -2,12 +2,16 @@
#include "../Overlays/overlay_shader_compile_notification.h"
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
#include "VKGSRender.h"
#include "VKHelpers.h"
#include "VKCommonDecompiler.h"
#include "VKCompute.h"
#include "VKRenderPass.h"
#include "VKResourceManager.h"
#include "VKCommandStream.h"
#include "vkutils/buffer_view.h"
#include "vkutils/buffer_object.h"
#include "vkutils/scratch.h"
#include "Emu/RSX/rsx_methods.h"
#include "Emu/Memory/vm_locking.h"
@ -27,13 +31,13 @@ namespace vk
switch (color_format)
{
case rsx::surface_color_format::r5g6b5:
return std::make_pair(VK_FORMAT_R5G6B5_UNORM_PACK16, vk::default_component_map());
return std::make_pair(VK_FORMAT_R5G6B5_UNORM_PACK16, vk::default_component_map);
case rsx::surface_color_format::a8r8g8b8:
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map());
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map);
case rsx::surface_color_format::a8b8g8r8:
return std::make_pair(VK_FORMAT_R8G8B8A8_UNORM, vk::default_component_map());
return std::make_pair(VK_FORMAT_R8G8B8A8_UNORM, vk::default_component_map);
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
return std::make_pair(VK_FORMAT_R8G8B8A8_UNORM, o_rgb);
@ -48,10 +52,10 @@ namespace vk
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, o_rgb);
case rsx::surface_color_format::w16z16y16x16:
return std::make_pair(VK_FORMAT_R16G16B16A16_SFLOAT, vk::default_component_map());
return std::make_pair(VK_FORMAT_R16G16B16A16_SFLOAT, vk::default_component_map);
case rsx::surface_color_format::w32z32y32x32:
return std::make_pair(VK_FORMAT_R32G32B32A32_SFLOAT, vk::default_component_map());
return std::make_pair(VK_FORMAT_R32G32B32A32_SFLOAT, vk::default_component_map);
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
return std::make_pair(VK_FORMAT_A1R5G5B5_UNORM_PACK16, o_rgb);
@ -79,7 +83,7 @@ namespace vk
default:
rsx_log.error("Surface color buffer: Unsupported surface color format (0x%x)", static_cast<u32>(color_format));
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map());
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map);
}
}
@ -323,9 +327,9 @@ u64 VKGSRender::get_cycles()
VKGSRender::VKGSRender() : GSRender()
{
if (m_thread_context.createInstance("RPCS3"))
if (m_instance.create("RPCS3"))
{
m_thread_context.makeCurrentInstance();
m_instance.bind();
}
else
{
@ -334,7 +338,7 @@ VKGSRender::VKGSRender() : GSRender()
return;
}
std::vector<vk::physical_device>& gpus = m_thread_context.enumerateDevices();
std::vector<vk::physical_device>& gpus = m_instance.enumerate_devices();
//Actually confirm that the loader found at least one compatible device
//This should not happen unless something is wrong with the driver setup on the target system
@ -365,7 +369,7 @@ VKGSRender::VKGSRender() : GSRender()
{
if (gpu.get_name() == adapter_name)
{
m_swapchain.reset(m_thread_context.createSwapChain(display, gpu));
m_swapchain.reset(m_instance.create_swapchain(display, gpu));
gpu_found = true;
break;
}
@ -373,7 +377,7 @@ VKGSRender::VKGSRender() : GSRender()
if (!gpu_found || adapter_name.empty())
{
m_swapchain.reset(m_thread_context.createSwapChain(display, gpus[0]));
m_swapchain.reset(m_instance.create_swapchain(display, gpus[0]));
}
if (!m_swapchain)
@ -384,8 +388,6 @@ VKGSRender::VKGSRender() : GSRender()
}
m_device = const_cast<vk::render_device*>(&m_swapchain->get_device());
vk::set_current_thread_ctx(m_thread_context);
vk::set_current_renderer(m_swapchain->get_device());
m_swapchain_dims.width = m_frame->client_width();
@ -640,7 +642,7 @@ VKGSRender::~VKGSRender()
//Device handles/contexts
m_swapchain->destroy();
m_thread_context.close();
m_instance.destroy();
#if defined(HAVE_X11) && defined(HAVE_VULKAN)
if (m_display_handle)
@ -1923,16 +1925,16 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
if (vk::test_status_interrupt(vk::heap_dirty))
{
if (m_attrib_ring_info.dirty() ||
m_fragment_env_ring_info.dirty() ||
m_vertex_env_ring_info.dirty() ||
m_fragment_texture_params_ring_info.dirty() ||
m_vertex_layout_ring_info.dirty() ||
m_fragment_constants_ring_info.dirty() ||
m_index_buffer_ring_info.dirty() ||
m_transform_constants_ring_info.dirty() ||
m_texture_upload_buffer_ring_info.dirty() ||
m_raster_env_ring_info.dirty())
if (m_attrib_ring_info.is_dirty() ||
m_fragment_env_ring_info.is_dirty() ||
m_vertex_env_ring_info.is_dirty() ||
m_fragment_texture_params_ring_info.is_dirty() ||
m_vertex_layout_ring_info.is_dirty() ||
m_fragment_constants_ring_info.is_dirty() ||
m_index_buffer_ring_info.is_dirty() ||
m_transform_constants_ring_info.is_dirty() ||
m_texture_upload_buffer_ring_info.is_dirty() ||
m_raster_env_ring_info.is_dirty())
{
std::lock_guard lock(m_secondary_cb_guard);
m_secondary_command_buffer.begin();

View file

@ -1,6 +1,12 @@
#pragma once
#include "Emu/RSX/GSRender.h"
#include "VKHelpers.h"
#include "vkutils/descriptors.hpp"
#include "vkutils/data_heap.h"
#include "vkutils/instance.hpp"
#include "vkutils/sync.h"
#include "vkutils/swapchain.hpp"
#include "VKTextureCache.h"
#include "VKRenderTargets.h"
#include "VKFormats.h"
@ -112,7 +118,7 @@ namespace vk
if (pending)
{
vk::reset_fence(submit_fence);
submit_fence->reset();
vk::on_event_completed(eid_tag);
pending = false;
@ -136,7 +142,7 @@ namespace vk
if (pending)
{
vk::reset_fence(submit_fence);
submit_fence->reset();
vk::on_event_completed(eid_tag);
pending = false;
@ -397,7 +403,7 @@ private:
std::unique_ptr<vk::program_cache> m_prog_buffer;
std::unique_ptr<vk::swapchain_base> m_swapchain;
vk::context m_thread_context;
vk::instance m_instance;
vk::render_device *m_device;
//Vulkan internals

View file

@ -10,6 +10,8 @@
#include "VKCommandStream.h"
#include "VKRenderPass.h"
#include "vkutils/scratch.h"
#include "vkutils/device.h"
#include "Emu/RSX/rsx_methods.h"
#include "Utilities/mutex.h"
#include "Utilities/lockless.h"
@ -19,21 +21,9 @@ namespace vk
{
extern chip_class g_chip_class;
const context* g_current_vulkan_ctx = nullptr;
const render_device* g_current_renderer;
std::unique_ptr<buffer> g_scratch_buffer;
std::unordered_map<VkImageViewType, std::unique_ptr<viewable_image>> g_null_image_views;
std::unordered_map<u32, std::unique_ptr<image>> g_typeless_textures;
std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;
std::unordered_map<u32, std::unique_ptr<vk::overlay_pass>> g_overlay_passes;
// General purpose upload heap
// TODO: Clean this up and integrate cleanly with VKGSRender
data_heap g_upload_heap;
VkSampler g_null_sampler = nullptr;
rsx::atomic_bitmask_t<runtime_state, u64> g_runtime_state;
// Driver compatibility workarounds
@ -47,244 +37,6 @@ namespace vk
u64 g_num_processed_frames = 0;
u64 g_num_total_frames = 0;
VKAPI_ATTR void* VKAPI_CALL mem_realloc(void* pUserData, void* pOriginal, usz size, usz alignment, VkSystemAllocationScope allocationScope)
{
#ifdef _MSC_VER
return _aligned_realloc(pOriginal, size, alignment);
#elif _WIN32
return __mingw_aligned_realloc(pOriginal, size, alignment);
#else
std::abort();
#endif
}
VKAPI_ATTR void* VKAPI_CALL mem_alloc(void* pUserData, usz size, usz alignment, VkSystemAllocationScope allocationScope)
{
#ifdef _MSC_VER
return _aligned_malloc(size, alignment);
#elif _WIN32
return __mingw_aligned_malloc(size, alignment);
#else
std::abort();
#endif
}
VKAPI_ATTR void VKAPI_CALL mem_free(void* pUserData, void* pMemory)
{
#ifdef _MSC_VER
_aligned_free(pMemory);
#elif _WIN32
__mingw_aligned_free(pMemory);
#else
std::abort();
#endif
}
bool data_heap::grow(usz size)
{
// Create new heap. All sizes are aligned up by 64M, upto 1GiB
const usz size_limit = 1024 * 0x100000;
const usz aligned_new_size = utils::align(m_size + size, 64 * 0x100000);
if (aligned_new_size >= size_limit)
{
// Too large
return false;
}
if (shadow)
{
// Shadowed. Growing this can be messy as it requires double allocation (macOS only)
return false;
}
// Wait for DMA activity to end
g_fxo->get<rsx::dma_manager>()->sync();
if (mapped)
{
// Force reset mapping
unmap(true);
}
VkBufferUsageFlags usage = heap->info.usage;
const auto device = get_current_renderer();
const auto& memory_map = device->get_memory_mapping();
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
auto memory_index = memory_map.host_visible_coherent;
// Update heap information and reset the allocator
::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
// Discard old heap and create a new one. Old heap will be garbage collected when no longer needed
get_resource_manager()->dispose(heap);
heap = std::make_unique<buffer>(*device, aligned_new_size, memory_index, memory_flags, usage, 0);
if (notify_on_grow)
{
raise_status_interrupt(vk::heap_changed);
}
return true;
}
VkAllocationCallbacks default_callbacks()
{
VkAllocationCallbacks callbacks;
callbacks.pfnAllocation = vk::mem_alloc;
callbacks.pfnFree = vk::mem_free;
callbacks.pfnReallocation = vk::mem_realloc;
return callbacks;
}
VkSampler null_sampler()
{
if (g_null_sampler)
return g_null_sampler;
VkSamplerCreateInfo sampler_info = {};
sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
sampler_info.anisotropyEnable = VK_FALSE;
sampler_info.compareEnable = VK_FALSE;
sampler_info.unnormalizedCoordinates = VK_FALSE;
sampler_info.mipLodBias = 0;
sampler_info.maxAnisotropy = 0;
sampler_info.magFilter = VK_FILTER_NEAREST;
sampler_info.minFilter = VK_FILTER_NEAREST;
sampler_info.compareOp = VK_COMPARE_OP_NEVER;
sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
vkCreateSampler(*g_current_renderer, &sampler_info, nullptr, &g_null_sampler);
return g_null_sampler;
}
vk::image_view* null_image_view(vk::command_buffer &cmd, VkImageViewType type)
{
if (auto found = g_null_image_views.find(type);
found != g_null_image_views.end())
{
return found->second->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
}
VkImageType image_type;
u32 num_layers = 1;
u32 flags = 0;
u16 size = 4;
switch (type)
{
case VK_IMAGE_VIEW_TYPE_1D:
image_type = VK_IMAGE_TYPE_1D;
size = 1;
break;
case VK_IMAGE_VIEW_TYPE_2D:
image_type = VK_IMAGE_TYPE_2D;
break;
case VK_IMAGE_VIEW_TYPE_3D:
image_type = VK_IMAGE_TYPE_3D;
break;
case VK_IMAGE_VIEW_TYPE_CUBE:
image_type = VK_IMAGE_TYPE_2D;
flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
num_layers = 6;
break;
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
image_type = VK_IMAGE_TYPE_2D;
num_layers = 2;
break;
default:
rsx_log.fatal("Unexpected image view type 0x%x", static_cast<u32>(type));
return nullptr;
}
auto& tex = g_null_image_views[type];
tex = std::make_unique<viewable_image>(*g_current_renderer, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type, VK_FORMAT_B8G8R8A8_UNORM, size, size, 1, 1, num_layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags);
// Initialize memory to transparent black
tex->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkClearColorValue clear_color = {};
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
vkCmdClearColorImage(cmd, tex->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range);
// Prep for shader access
tex->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
// Return view
return tex->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
}
vk::image* get_typeless_helper(VkFormat format, rsx::format_class format_class, u32 requested_width, u32 requested_height)
{
auto create_texture = [&]()
{
u32 new_width = utils::align(requested_width, 1024u);
u32 new_height = utils::align(requested_height, 1024u);
return new vk::image(*g_current_renderer, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D, format, new_width, new_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0);
};
const u32 key = (format_class << 24u) | format;
auto& ptr = g_typeless_textures[key];
if (!ptr || ptr->width() < requested_width || ptr->height() < requested_height)
{
if (ptr)
{
requested_width = std::max(requested_width, ptr->width());
requested_height = std::max(requested_height, ptr->height());
get_resource_manager()->dispose(ptr);
}
ptr.reset(create_texture());
}
return ptr.get();
}
vk::buffer* get_scratch_buffer(u32 min_required_size)
{
if (g_scratch_buffer && g_scratch_buffer->size() < min_required_size)
{
// Scratch heap cannot fit requirements. Discard it and allocate a new one.
vk::get_resource_manager()->dispose(g_scratch_buffer);
}
if (!g_scratch_buffer)
{
// Choose optimal size
const u64 alloc_size = std::max<u64>(64 * 0x100000, utils::align(min_required_size, 0x100000));
g_scratch_buffer = std::make_unique<vk::buffer>(*g_current_renderer, alloc_size,
g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0);
}
return g_scratch_buffer.get();
}
data_heap* get_upload_heap()
{
if (!g_upload_heap.heap)
{
g_upload_heap.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 64 * 0x100000, "auxilliary upload heap", 0x100000);
}
return &g_upload_heap;
}
void reset_compute_tasks()
{
for (const auto &p : g_compute_tasks)
@ -307,30 +59,21 @@ namespace vk
vk::reset_resolve_resources();
vk::reset_overlay_passes();
g_upload_heap.reset_allocation_stats();
get_upload_heap()->reset_allocation_stats();
}
void destroy_global_resources()
{
VkDevice dev = *g_current_renderer;
VkDevice dev = *g_render_device;
vk::clear_renderpass_cache(dev);
vk::clear_framebuffer_cache();
vk::clear_resolve_helpers();
vk::clear_dma_resources();
vk::vmm_reset();
vk::get_resource_manager()->destroy();
vk::clear_scratch_resources();
g_null_image_views.clear();
g_scratch_buffer.reset();
g_upload_heap.destroy();
g_typeless_textures.clear();
if (g_null_sampler)
{
vkDestroySampler(dev, g_null_sampler, nullptr);
g_null_sampler = nullptr;
}
vk::get_upload_heap()->destroy();
for (const auto& p : g_compute_tasks)
{
@ -345,34 +88,24 @@ namespace vk
g_overlay_passes.clear();
}
void set_current_thread_ctx(const vk::context &ctx)
{
g_current_vulkan_ctx = &ctx;
}
const context *get_current_thread_ctx()
{
return g_current_vulkan_ctx;
}
const vk::render_device *get_current_renderer()
{
return g_current_renderer;
return g_render_device;
}
void set_current_renderer(const vk::render_device &device)
{
g_current_renderer = &device;
g_render_device = &device;
g_runtime_state.clear();
g_drv_no_primitive_restart = false;
g_drv_sanitize_fp_values = false;
g_drv_disable_fence_reset = false;
g_drv_emulate_cond_render = (g_cfg.video.relaxed_zcull_sync && !g_current_renderer->get_conditional_render_support());
g_drv_emulate_cond_render = (g_cfg.video.relaxed_zcull_sync && !g_render_device->get_conditional_render_support());
g_num_processed_frames = 0;
g_num_total_frames = 0;
g_heap_compatible_buffer_types = 0;
const auto& gpu = g_current_renderer->gpu();
const auto& gpu = g_render_device->gpu();
const auto gpu_name = gpu.get_name();
g_driver_vendor = gpu.get_driver_vendor();
@ -424,15 +157,15 @@ namespace vk
for (const auto &usage : types)
{
info.usage = usage;
CHECK_RESULT(vkCreateBuffer(*g_current_renderer, &info, nullptr, &tmp));
CHECK_RESULT(vkCreateBuffer(*g_render_device, &info, nullptr, &tmp));
vkGetBufferMemoryRequirements(*g_current_renderer, tmp, &memory_reqs);
if (g_current_renderer->get_compatible_memory_type(memory_reqs.memoryTypeBits, memory_flags, nullptr))
vkGetBufferMemoryRequirements(*g_render_device, tmp, &memory_reqs);
if (g_render_device->get_compatible_memory_type(memory_reqs.memoryTypeBits, memory_flags, nullptr))
{
g_heap_compatible_buffer_types |= usage;
}
vkDestroyBuffer(*g_current_renderer, tmp, nullptr);
vkDestroyBuffer(*g_render_device, tmp, nullptr);
}
}
}
@ -479,277 +212,6 @@ namespace vk
return g_drv_emulate_cond_render;
}
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkBufferMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.buffer = buffer;
barrier.offset = offset;
barrier.size = length;
barrier.srcAccessMask = src_mask;
barrier.dstAccessMask = dst_mask;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 1, &barrier, 0, nullptr);
}
void insert_image_memory_barrier(
VkCommandBuffer cmd, VkImage image,
VkImageLayout current_layout, VkImageLayout new_layout,
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage,
VkAccessFlags src_mask, VkAccessFlags dst_mask,
const VkImageSubresourceRange& range)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout;
barrier.oldLayout = current_layout;
barrier.image = image;
barrier.srcAccessMask = src_mask;
barrier.dstAccessMask = dst_mask;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = range;
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void insert_execution_barrier(VkCommandBuffer cmd, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 0, nullptr);
}
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
//Prepare an image to match the new layout..
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout;
barrier.oldLayout = current_layout;
barrier.image = image;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = 0;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = range;
VkPipelineStageFlags src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkPipelineStageFlags dst_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
switch (new_layout)
{
case VK_IMAGE_LAYOUT_GENERAL:
// Avoid this layout as it is unoptimized
barrier.dstAccessMask =
{
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
};
dst_stage =
{
VK_PIPELINE_STAGE_TRANSFER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
};
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dst_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_PREINITIALIZED:
fmt::throw_exception("Attempted to transition to an invalid layout");
}
switch (current_layout)
{
case VK_IMAGE_LAYOUT_GENERAL:
// Avoid this layout as it is unoptimized
if (new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ||
new_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
{
if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
{
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
else
{
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
}
}
else if (new_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ||
new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
{
// Finish reading before writing
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
}
else
{
barrier.srcAccessMask =
{
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
};
src_stage =
{
VK_PIPELINE_STAGE_TRANSFER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
};
}
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
break; //TODO Investigate what happens here
}
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{
if (image->current_layout == new_layout) return;
change_image_layout(cmd, image->value, image->current_layout, new_layout, range);
image->current_layout = new_layout;
}
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout)
{
if (image->current_layout == new_layout) return;
change_image_layout(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, image->mipmaps(), 0, image->layers() });
image->current_layout = new_layout;
}
void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range)
{
// NOTE: Sampling from an attachment in ATTACHMENT_OPTIMAL layout on some hw ends up with garbage output
// Transition to GENERAL if this resource is both input and output
// TODO: This implicitly makes the target incompatible with the renderpass declaration; investigate a proper workaround
// TODO: This likely throws out hw optimizations on the rest of the renderpass, manage carefully
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkAccessFlags src_access;
VkPipelineStageFlags src_stage;
if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
{
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
else
{
if (!rsx::method_registers.depth_write_enabled() && current_layout == new_layout)
{
// Nothing to do
return;
}
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
}
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout;
barrier.oldLayout = current_layout;
barrier.image = image;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = range;
barrier.srcAccessMask = src_access;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(cmd, src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout)
{
if (image->samples() > 1)
{
// This barrier is pointless for multisampled images as they require a resolve operation before access anyway
return;
}
insert_texture_barrier(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, 1, 0, 1 });
image->current_layout = new_layout;
}
void raise_status_interrupt(runtime_state status)
{
g_runtime_state |= status;
@ -801,84 +263,7 @@ namespace vk
return (g_num_processed_frames > 0)? g_num_processed_frames - 1: 0;
}
void reset_fence(fence *pFence)
{
if (g_drv_disable_fence_reset)
{
delete pFence;
pFence = new fence(*g_current_renderer);
}
else
{
pFence->reset();
}
}
VkResult wait_for_fence(fence* pFence, u64 timeout)
{
pFence->wait_flush();
if (timeout)
{
return vkWaitForFences(*g_current_renderer, 1, &pFence->handle, VK_FALSE, timeout * 1000ull);
}
else
{
while (auto status = vkGetFenceStatus(*g_current_renderer, pFence->handle))
{
switch (status)
{
case VK_NOT_READY:
continue;
default:
die_with_error(status);
return status;
}
}
return VK_SUCCESS;
}
}
VkResult wait_for_event(event* pEvent, u64 timeout)
{
u64 t = 0;
while (true)
{
switch (const auto status = pEvent->status())
{
case VK_EVENT_SET:
return VK_SUCCESS;
case VK_EVENT_RESET:
break;
default:
die_with_error(status);
return status;
}
if (timeout)
{
if (!t)
{
t = get_system_time();
continue;
}
if ((get_system_time() - t) > timeout)
{
rsx_log.error("[vulkan] vk::wait_for_event has timed out!");
return VK_TIMEOUT;
}
}
//std::this_thread::yield();
#ifdef _MSC_VER
_mm_pause();
#else
__builtin_ia32_pause();
#endif
}
}
void do_query_cleanup(vk::command_buffer& cmd)
{
@ -887,15 +272,4 @@ namespace vk
renderer->emergency_query_cleanup(&cmd);
}
VkBool32 BreakCallback(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType,
u64 srcObject, usz location, s32 msgCode,
const char *pLayerPrefix, const char *pMsg, void *pUserData)
{
#ifdef _WIN32
DebugBreak();
#endif
return false;
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,4 @@
#pragma once
#include "VKHelpers.h"
#include "VKVertexProgram.h"
#include "VKFragmentProgram.h"
#include "VKRenderTargets.h"
@ -7,6 +6,10 @@
#include "VKResourceManager.h"
#include "VKRenderPass.h"
#include "VKPipelineCompiler.h"
#include "vkutils/data_heap.h"
#include "vkutils/image.h"
#include "vkutils/image_helpers.h"
#include "vkutils/sampler.h"
#include "../Overlays/overlays.h"

View file

@ -1,7 +1,7 @@
#include "stdafx.h"
#include "VKPipelineCompiler.h"
#include "VKRenderPass.h"
#include "VKHelpers.h"
#include "vkutils/device.h"
#include "Utilities/Thread.h"
#include <thread>
@ -53,7 +53,7 @@ namespace vk
std::unique_ptr<glsl::program> pipe_compiler::int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout)
{
VkPipeline pipeline;
vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &create_info, nullptr, &pipeline);
vkCreateComputePipelines(*g_render_device, nullptr, 1, &create_info, nullptr, &pipeline);
return std::make_unique<vk::glsl::program>(*m_device, pipeline, pipe_layout);
}
@ -91,7 +91,7 @@ namespace vk
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS);
if (vk::get_current_renderer()->get_depth_bounds_support())
if (g_render_device->get_depth_bounds_support())
{
dynamic_state_descriptors.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS);
}
@ -206,9 +206,7 @@ namespace vk
}
ensure(num_worker_threads >= 1);
const vk::render_device* dev = vk::get_current_renderer();
ensure(dev); // "Cannot initialize pipe compiler before creating a logical device"
ensure(g_render_device); // "Cannot initialize pipe compiler before creating a logical device"
// Create the thread pool
g_pipe_compilers = std::make_unique<named_thread_group<pipe_compiler>>("RSX.W", num_worker_threads);
@ -217,7 +215,7 @@ namespace vk
// Initialize the workers. At least one inline compiler shall exist (doesn't actually run)
for (pipe_compiler& compiler : *g_pipe_compilers.get())
{
compiler.initialize(dev);
compiler.initialize(g_render_device);
}
}

View file

@ -3,7 +3,7 @@
#include "Utilities/hash.h"
#include "Utilities/lockless.h"
#include "VKProgramPipeline.h"
#include "vkutils/graphics_pipeline_state.h"
#include "vkutils/graphics_pipeline_state.hpp"
namespace vk
{

View file

@ -1,6 +1,6 @@
#include "stdafx.h"
#include "VKGSRender.h"
#include "vkutils/buffer_view.h"
#include "vkutils/buffer_object.h"
#include "Emu/Cell/Modules/cellVideoOut.h"
#include "util/asm.hpp"

View file

@ -1,7 +1,6 @@
#include "stdafx.h"
#include "VKProgramPipeline.h"
#include "VKHelpers.h"
#include "vkutils/device.h"
#include <string>
namespace vk
@ -36,8 +35,7 @@ namespace vk
vs_info.pCode = m_compiled.data();
vs_info.flags = 0;
VkDevice dev = *vk::get_current_renderer();
vkCreateShaderModule(dev, &vs_info, nullptr, &m_handle);
vkCreateShaderModule(*g_render_device, &vs_info, nullptr, &m_handle);
return m_handle;
}
@ -49,8 +47,7 @@ namespace vk
if (m_handle)
{
VkDevice dev = *vk::get_current_renderer();
vkDestroyShaderModule(dev, m_handle, nullptr);
vkDestroyShaderModule(*g_render_device, m_handle, nullptr);
m_handle = nullptr;
}
}

View file

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "VKQueryPool.h"
#include "VKRenderPass.h"
#include "VKResourceManager.h"
namespace vk

View file

@ -2,7 +2,7 @@
#include "Utilities/mutex.h"
#include "VKRenderPass.h"
#include "VKHelpers.h"
#include "vkutils/image.h"
namespace vk
{

View file

@ -1,10 +1,16 @@
#pragma once
#include "util/types.hpp"
#include "VKHelpers.h"
#include "VKFormats.h"
#include "../Common/surface_store.h"
#include "VKFormats.h"
#include "VKHelpers.h"
#include "vkutils/barriers.h"
#include "vkutils/data_heap.h"
#include "vkutils/device.h"
#include "vkutils/image.h"
#include "vkutils/scratch.h"
namespace vk
{
void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
@ -21,7 +27,6 @@ namespace vk
if (!resolve_surface)
{
// Create a resolve surface
auto pdev = vk::get_current_renderer();
const auto resolve_w = width() * samples_x;
const auto resolve_h = height() * samples_y;
@ -29,8 +34,8 @@ namespace vk
usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT));
resolve_surface.reset(new vk::viewable_image(
*pdev,
pdev->get_memory_mapping().device_local,
*g_render_device,
g_render_device->get_memory_mapping().device_local,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D,
format(),

View file

@ -1,6 +1,5 @@
#pragma once
#include "VKHelpers.h"
#include "VKCompute.h"
#include "VKOverlays.h"

View file

@ -1,6 +1,6 @@
#pragma once
#include "VKHelpers.h"
#include "vkutils/query_pool.h"
#include "vkutils/image.h"
#include "vkutils/query_pool.hpp"
#include "vkutils/sampler.h"
#include <unordered_map>
@ -24,7 +24,7 @@ namespace vk
std::vector<std::unique_ptr<vk::query_pool>> m_disposed_query_pools;
eid_scope_t(u64 _eid):
eid(_eid), m_device(vk::get_current_renderer())
eid(_eid), m_device(g_render_device)
{}
~eid_scope_t()

View file

@ -1,6 +1,6 @@
#pragma once
#include "VKProgramBuffer.h"
#include "VKHelpers.h"
#include "vkutils/descriptors.hpp"
#include <unordered_map>
namespace vk

View file

@ -1,9 +1,11 @@
#pragma once
#include "VKHelpers.h"
#include "VKVertexProgram.h"
#include "VKFragmentProgram.h"
#include "VKRenderPass.h"
#include "VKPipelineCompiler.h"
#include "vkutils/framebuffer_object.hpp"
#include "../Common/TextGlyphs.h"
#include <unordered_map>

View file

@ -1,64 +1,20 @@
#include "stdafx.h"
#include "VKHelpers.h"
#include "../GCM.h"
#include "../rsx_utils.h"
#include "VKFormats.h"
#include "VKCompute.h"
#include "VKRenderPass.h"
#include "VKRenderTargets.h"
#include "vkutils/data_heap.h"
#include "vkutils/image_helpers.h"
#include "../GCM.h"
#include "../rsx_utils.h"
#include "util/asm.hpp"
namespace vk
{
VkComponentMapping default_component_map()
{
VkComponentMapping result = {};
result.a = VK_COMPONENT_SWIZZLE_A;
result.r = VK_COMPONENT_SWIZZLE_R;
result.g = VK_COMPONENT_SWIZZLE_G;
result.b = VK_COMPONENT_SWIZZLE_B;
return result;
}
VkImageSubresource default_image_subresource()
{
VkImageSubresource subres = {};
subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subres.mipLevel = 0;
subres.arrayLayer = 0;
return subres;
}
VkImageSubresourceRange get_image_subresource_range(u32 base_layer, u32 base_mip, u32 layer_count, u32 level_count, VkImageAspectFlags aspect)
{
VkImageSubresourceRange subres = {};
subres.aspectMask = aspect;
subres.baseArrayLayer = base_layer;
subres.baseMipLevel = base_mip;
subres.layerCount = layer_count;
subres.levelCount = level_count;
return subres;
}
VkImageAspectFlags get_aspect_flags(VkFormat format)
{
switch (format)
{
default:
return VK_IMAGE_ASPECT_COLOR_BIT;
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D32_SFLOAT:
return VK_IMAGE_ASPECT_DEPTH_BIT;
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
}
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes)
{
// Always validate
@ -1005,31 +961,6 @@ namespace vk
}
}
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector)
{
VkComponentSwizzle final_mapping[4] = {};
for (u8 channel = 0; channel < 4; ++channel)
{
switch (remap_vector.second[channel])
{
case CELL_GCM_TEXTURE_REMAP_ONE:
final_mapping[channel] = VK_COMPONENT_SWIZZLE_ONE;
break;
case CELL_GCM_TEXTURE_REMAP_ZERO:
final_mapping[channel] = VK_COMPONENT_SWIZZLE_ZERO;
break;
case CELL_GCM_TEXTURE_REMAP_REMAP:
final_mapping[channel] = base_remap[remap_vector.first[channel]];
break;
default:
rsx_log.error("Unknown remap lookup value %d", remap_vector.second[channel]);
}
}
return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] };
}
void blitter::scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, const rsx::typeless_xfer& xfer_info)
{
vk::image* real_src = src;

View file

@ -1,9 +1,10 @@
#pragma once
#include "util/types.hpp"
#include "VKRenderTargets.h"
#include "VKResourceManager.h"
#include "VKDMA.h"
#include "vkutils/image_helpers.h"
#include "../Common/texture_cache.h"
#include <memory>

View file

@ -2,7 +2,7 @@
#include "VKGSRender.h"
#include "../Common/BufferUtils.h"
#include "../rsx_methods.h"
#include "vkutils/buffer_view.h"
#include "vkutils/buffer_object.h"
namespace vk
{

View file

@ -3,6 +3,7 @@
#include "VKVertexProgram.h"
#include "VKCommonDecompiler.h"
#include "VKHelpers.h"
#include "vkutils/device.h"
#include "../Common/GLSLCommon.h"
@ -186,7 +187,7 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
properties2.domain = glsl::glsl_vertex_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.emulate_zclip_transform = true;
properties2.emulate_depth_clip_only = vk::get_current_renderer()->get_shader_types_support().allow_float64;
properties2.emulate_depth_clip_only = vk::g_render_device->get_shader_types_support().allow_float64;
glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_spirv);
@ -311,7 +312,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
void VKVertexDecompilerThread::Task()
{
m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering();
m_binding_table = vk::get_current_renderer()->get_pipeline_binding_table();
m_binding_table = vk::g_render_device->get_pipeline_binding_table();
m_shader = Decompile();
vk_prog->SetInputs(inputs);

View file

@ -8,6 +8,12 @@
#define VK_USE_PLATFORM_XLIB_KHR
#endif
#pragma warning( push )
#pragma warning( disable : 4005 )
#include <vulkan/vulkan.h>
#include <vulkan/vk_sdk_platform.h>
#include "util/types.hpp"
#pragma warning(pop)
#include <util/types.hpp>

View file

@ -0,0 +1,121 @@
#include "barriers.h"
#include "commands.h"
#include "image.h"
#include "../../rsx_methods.h"
#include "../VKRenderPass.h"
namespace vk
{
void insert_image_memory_barrier(
VkCommandBuffer cmd, VkImage image,
VkImageLayout current_layout, VkImageLayout new_layout,
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage,
VkAccessFlags src_mask, VkAccessFlags dst_mask,
const VkImageSubresourceRange& range)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout;
barrier.oldLayout = current_layout;
barrier.image = image;
barrier.srcAccessMask = src_mask;
barrier.dstAccessMask = dst_mask;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = range;
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkBufferMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.buffer = buffer;
barrier.offset = offset;
barrier.size = length;
barrier.srcAccessMask = src_mask;
barrier.dstAccessMask = dst_mask;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 1, &barrier, 0, nullptr);
}
void insert_execution_barrier(VkCommandBuffer cmd, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 0, nullptr);
}
void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range)
{
// NOTE: Sampling from an attachment in ATTACHMENT_OPTIMAL layout on some hw ends up with garbage output
// Transition to GENERAL if this resource is both input and output
// TODO: This implicitly makes the target incompatible with the renderpass declaration; investigate a proper workaround
// TODO: This likely throws out hw optimizations on the rest of the renderpass, manage carefully
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkAccessFlags src_access;
VkPipelineStageFlags src_stage;
if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
{
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
else
{
if (!rsx::method_registers.depth_write_enabled() && current_layout == new_layout)
{
// Nothing to do
return;
}
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
}
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout;
barrier.oldLayout = current_layout;
barrier.image = image;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = range;
barrier.srcAccessMask = src_access;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(cmd, src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void insert_texture_barrier(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout)
{
if (image->samples() > 1)
{
// This barrier is pointless for multisampled images as they require a resolve operation before access anyway
return;
}
insert_texture_barrier(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, 1, 0, 1 });
image->current_layout = new_layout;
}
}

View file

@ -0,0 +1,23 @@
#pragma once
#include "../VulkanAPI.h"
namespace vk
{
class image;
//Texture barrier applies to a texture to ensure writes to it are finished before any reads are attempted to avoid RAW hazards
void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range);
void insert_texture_barrier(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout);
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length,
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask);
void insert_image_memory_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout,
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask,
const VkImageSubresourceRange& range);
void insert_execution_barrier(VkCommandBuffer cmd,
VkPipelineStageFlags src_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VkPipelineStageFlags dst_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}

View file

@ -0,0 +1,88 @@
#include "buffer_object.h"
#include "device.h"
#include "shared.h"
namespace vk
{
buffer_view::buffer_view(VkDevice dev, VkBuffer buffer, VkFormat format, VkDeviceSize offset, VkDeviceSize size)
: m_device(dev)
{
info.buffer = buffer;
info.format = format;
info.offset = offset;
info.range = size;
info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
CHECK_RESULT(vkCreateBufferView(m_device, &info, nullptr, &value));
}
buffer_view::~buffer_view()
{
vkDestroyBufferView(m_device, value, nullptr);
}
bool buffer_view::in_range(u32 address, u32 size, u32& offset) const
{
if (address < info.offset)
return false;
const u32 _offset = address - static_cast<u32>(info.offset);
if (info.range < _offset)
return false;
const auto remaining = info.range - _offset;
if (size <= remaining)
{
offset = _offset;
return true;
}
return false;
}
buffer::buffer(const vk::render_device& dev, u64 size, u32 memory_type_index, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags)
: m_device(dev)
{
info.size = size;
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
info.flags = flags;
info.usage = usage;
CHECK_RESULT(vkCreateBuffer(m_device, &info, nullptr, &value));
//Allocate vram for this buffer
VkMemoryRequirements memory_reqs;
vkGetBufferMemoryRequirements(m_device, value, &memory_reqs);
if (!(memory_reqs.memoryTypeBits & (1 << memory_type_index)))
{
//Suggested memory type is incompatible with this memory type.
//Go through the bitset and test for requested props.
if (!dev.get_compatible_memory_type(memory_reqs.memoryTypeBits, access_flags, &memory_type_index))
fmt::throw_exception("No compatible memory type was found!");
}
memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, memory_type_index);
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset());
}
buffer::~buffer()
{
vkDestroyBuffer(m_device, value, nullptr);
}
void* buffer::map(u64 offset, u64 size)
{
return memory->map(offset, size);
}
void buffer::unmap()
{
memory->unmap();
}
u32 buffer::size() const
{
return static_cast<u32>(info.size);
}
}

View file

@ -0,0 +1,45 @@
#pragma once
#include "../VulkanAPI.h"
#include "device.h"
#include "memory.h"
namespace vk
{
struct buffer_view
{
VkBufferView value;
VkBufferViewCreateInfo info = {};
buffer_view(VkDevice dev, VkBuffer buffer, VkFormat format, VkDeviceSize offset, VkDeviceSize size);
~buffer_view();
buffer_view(const buffer_view&) = delete;
buffer_view(buffer_view&&) = delete;
bool in_range(u32 address, u32 size, u32& offset) const;
private:
VkDevice m_device;
};
struct buffer
{
VkBuffer value;
VkBufferCreateInfo info = {};
std::unique_ptr<vk::memory_block> memory;
buffer(const vk::render_device& dev, u64 size, u32 memory_type_index, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags);
~buffer();
void* map(u64 offset, u64 size);
void unmap();
u32 size() const;
buffer(const buffer&) = delete;
buffer(buffer&&) = delete;
private:
VkDevice m_device;
};
}

View file

@ -1,40 +0,0 @@
#include "buffer_view.h"
#include "shared.h"
namespace vk
{
buffer_view::buffer_view(VkDevice dev, VkBuffer buffer, VkFormat format, VkDeviceSize offset, VkDeviceSize size)
: m_device(dev)
{
info.buffer = buffer;
info.format = format;
info.offset = offset;
info.range = size;
info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
CHECK_RESULT(vkCreateBufferView(m_device, &info, nullptr, &value));
}
buffer_view::~buffer_view()
{
vkDestroyBufferView(m_device, value, nullptr);
}
bool buffer_view::in_range(u32 address, u32 size, u32& offset) const
{
if (address < info.offset)
return false;
const u32 _offset = address - static_cast<u32>(info.offset);
if (info.range < _offset)
return false;
const auto remaining = info.range - _offset;
if (size <= remaining)
{
offset = _offset;
return true;
}
return false;
}
}

View file

@ -1,23 +0,0 @@
#pragma once
#include "../VulkanAPI.h"
namespace vk
{
struct buffer_view
{
VkBufferView value;
VkBufferViewCreateInfo info = {};
buffer_view(VkDevice dev, VkBuffer buffer, VkFormat format, VkDeviceSize offset, VkDeviceSize size);
~buffer_view();
buffer_view(const buffer_view&) = delete;
buffer_view(buffer_view&&) = delete;
bool in_range(u32 address, u32 size, u32& offset) const;
private:
VkDevice m_device;
};
}

View file

@ -1,35 +0,0 @@
#include "command_pool.h"
#include "render_device.h"
#include "shared.h"
namespace vk
{
void command_pool::create(vk::render_device& dev)
{
owner = &dev;
VkCommandPoolCreateInfo infos = {};
infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool));
}
void command_pool::destroy()
{
if (!pool)
return;
vkDestroyCommandPool((*owner), pool, nullptr);
pool = nullptr;
}
vk::render_device& command_pool::get_owner()
{
return (*owner);
}
command_pool::operator VkCommandPool()
{
return pool;
}
}

View file

@ -1,25 +0,0 @@
#pragma once
#include "../VulkanAPI.h"
namespace vk
{
class render_device;
class command_pool
{
vk::render_device* owner = nullptr;
VkCommandPool pool = nullptr;
public:
command_pool() = default;
~command_pool() = default;
void create(vk::render_device& dev);
void destroy();
vk::render_device& get_owner();
operator VkCommandPool();
};
}

View file

@ -0,0 +1,145 @@
#include "commands.h"
#include "device.h"
#include "shared.h"
#include "sync.h"
namespace vk
{
// This queue flushing method to be implemented by the backend as behavior depends on config
void queue_submit(VkQueue queue, const VkSubmitInfo* info, fence* pfence, VkBool32 flush = VK_FALSE);
void command_pool::create(vk::render_device& dev)
{
owner = &dev;
VkCommandPoolCreateInfo infos = {};
infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool));
}
void command_pool::destroy()
{
if (!pool)
return;
vkDestroyCommandPool((*owner), pool, nullptr);
pool = nullptr;
}
vk::render_device& command_pool::get_owner()
{
return (*owner);
}
command_pool::operator VkCommandPool()
{
return pool;
}
void command_buffer::create(command_pool& cmd_pool, bool auto_reset)
{
VkCommandBufferAllocateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
infos.commandBufferCount = 1;
infos.commandPool = +cmd_pool;
infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands));
if (auto_reset)
{
m_submit_fence = new fence(cmd_pool.get_owner());
}
pool = &cmd_pool;
}
void command_buffer::destroy()
{
vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands);
if (m_submit_fence)
{
//vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr);
delete m_submit_fence;
m_submit_fence = nullptr;
}
}
void command_buffer::begin()
{
if (m_submit_fence && is_pending)
{
wait_for_fence(m_submit_fence);
is_pending = false;
//CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence));
m_submit_fence->reset();
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
if (is_open)
return;
VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
CHECK_RESULT(vkBeginCommandBuffer(commands, &begin_infos));
is_open = true;
}
void command_buffer::end()
{
if (!is_open)
{
rsx_log.error("commandbuffer->end was called but commandbuffer is not in a recording state");
return;
}
CHECK_RESULT(vkEndCommandBuffer(commands));
is_open = false;
}
void command_buffer::submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, fence* pfence, VkPipelineStageFlags pipeline_stage_flags, VkBool32 flush)
{
if (is_open)
{
rsx_log.error("commandbuffer->submit was called whilst the command buffer is in a recording state");
return;
}
// Check for hanging queries to avoid driver hang
ensure((flags & cb_has_open_query) == 0); // "close and submit of commandbuffer with a hanging query!"
if (!pfence)
{
pfence = m_submit_fence;
is_pending = bool(pfence);
}
VkSubmitInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
infos.commandBufferCount = 1;
infos.pCommandBuffers = &commands;
infos.pWaitDstStageMask = &pipeline_stage_flags;
if (wait_semaphore)
{
infos.waitSemaphoreCount = 1;
infos.pWaitSemaphores = &wait_semaphore;
}
if (signal_semaphore)
{
infos.signalSemaphoreCount = 1;
infos.pSignalSemaphores = &signal_semaphore;
}
queue_submit(queue, &infos, pfence, flush);
clear_flags();
}
}

View file

@ -0,0 +1,93 @@
#pragma once
#include "../VulkanAPI.h"
#include "device.h"
#include "sync.h"
namespace vk
{
class command_pool
{
vk::render_device* owner = nullptr;
VkCommandPool pool = nullptr;
public:
command_pool() = default;
~command_pool() = default;
void create(vk::render_device& dev);
void destroy();
vk::render_device& get_owner();
operator VkCommandPool();
};
class command_buffer
{
private:
bool is_open = false;
bool is_pending = false;
fence* m_submit_fence = nullptr;
protected:
command_pool* pool = nullptr;
VkCommandBuffer commands = nullptr;
public:
enum access_type_hint
{
flush_only, //Only to be submitted/opened/closed via command flush
all //Auxiliary, can be submitted/opened/closed at any time
}
access_hint = flush_only;
enum command_buffer_data_flag : u32
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4,
cb_has_open_query = 8,
cb_load_occluson_task = 16,
cb_has_conditional_render = 32
};
u32 flags = 0;
public:
command_buffer() = default;
~command_buffer() = default;
void create(command_pool& cmd_pool, bool auto_reset = false);
void destroy();
void begin();
void end();
void submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, fence* pfence, VkPipelineStageFlags pipeline_stage_flags, VkBool32 flush = VK_FALSE);
// Properties
command_pool& get_command_pool() const
{
return *pool;
}
void clear_flags()
{
flags = 0;
}
void set_flag(command_buffer_data_flag flag)
{
flags |= flag;
}
operator VkCommandBuffer() const
{
return commands;
}
bool is_recording() const
{
return is_open;
}
};
}

View file

@ -0,0 +1,178 @@
#include "barriers.h"
#include "data_heap.h"
#include "device.h"
#include "../../RSXOffload.h"
#include "../VKHelpers.h"
#include "../VKResourceManager.h"
#include "Emu/IdManager.h"
#include <memory>
namespace vk
{
data_heap g_upload_heap;
void data_heap::create(VkBufferUsageFlags usage, usz size, const char* name, usz guard, VkBool32 notify)
{
::data_heap::init(size, name, guard);
const auto& memory_map = g_render_device->get_memory_mapping();
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
auto memory_index = memory_map.host_visible_coherent;
if (!(get_heap_compatible_buffer_types() & usage))
{
rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage);
shadow = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0);
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
memory_index = memory_map.device_local;
}
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, 0);
initial_size = size;
notify_on_grow = bool(notify);
}
void data_heap::destroy()
{
if (mapped)
{
unmap(true);
}
heap.reset();
shadow.reset();
}
bool data_heap::grow(usz size)
{
// Create new heap. All sizes are aligned up by 64M, upto 1GiB
const usz size_limit = 1024 * 0x100000;
const usz aligned_new_size = utils::align(m_size + size, 64 * 0x100000);
if (aligned_new_size >= size_limit)
{
// Too large
return false;
}
if (shadow)
{
// Shadowed. Growing this can be messy as it requires double allocation (macOS only)
return false;
}
// Wait for DMA activity to end
g_fxo->get<rsx::dma_manager>()->sync();
if (mapped)
{
// Force reset mapping
unmap(true);
}
VkBufferUsageFlags usage = heap->info.usage;
const auto& memory_map = g_render_device->get_memory_mapping();
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
auto memory_index = memory_map.host_visible_coherent;
// Update heap information and reset the allocator
::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
// Discard old heap and create a new one. Old heap will be garbage collected when no longer needed
get_resource_manager()->dispose(heap);
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, 0);
if (notify_on_grow)
{
raise_status_interrupt(vk::heap_changed);
}
return true;
}
void* data_heap::map(usz offset, usz size)
{
if (!_ptr)
{
if (shadow)
_ptr = shadow->map(0, shadow->size());
else
_ptr = heap->map(0, heap->size());
mapped = true;
}
if (shadow)
{
dirty_ranges.push_back({ offset, offset, size });
raise_status_interrupt(runtime_state::heap_dirty);
}
return static_cast<u8*>(_ptr) + offset;
}
void data_heap::unmap(bool force)
{
if (force)
{
if (shadow)
shadow->unmap();
else
heap->unmap();
mapped = false;
_ptr = nullptr;
}
}
void data_heap::sync(const vk::command_buffer& cmd)
{
if (!dirty_ranges.empty())
{
ensure(shadow);
ensure(heap);
vkCmdCopyBuffer(cmd, shadow->value, heap->value, ::size32(dirty_ranges), dirty_ranges.data());
dirty_ranges.clear();
insert_buffer_memory_barrier(cmd, heap->value, 0, heap->size(),
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
}
}
bool data_heap::is_dirty() const
{
return !dirty_ranges.empty();
}
bool data_heap::is_critical() const
{
if (!::data_heap::is_critical())
return false;
// By default, allow the size to grow upto 8x larger
// This value is arbitrary, theoretically it is possible to allow infinite stretching to improve performance
const usz soft_limit = initial_size * 8;
if ((m_size + m_min_guard_size) < soft_limit)
return false;
return true;
}
data_heap* get_upload_heap()
{
if (!g_upload_heap.heap)
{
g_upload_heap.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 64 * 0x100000, "auxilliary upload heap", 0x100000);
}
return &g_upload_heap;
}
}

View file

@ -0,0 +1,49 @@
#pragma once
#include "../../Common/ring_buffer_helper.h"
#include "../VulkanAPI.h"
#include "buffer_object.h"
#include "commands.h"
#include <memory>
#include <vector>
namespace vk
{
class data_heap : public ::data_heap
{
private:
usz initial_size = 0;
bool mapped = false;
void* _ptr = nullptr;
bool notify_on_grow = false;
std::unique_ptr<buffer> shadow;
std::vector<VkBufferCopy> dirty_ranges;
protected:
bool grow(usz size) override;
public:
std::unique_ptr<buffer> heap;
// NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow
// Avoid mapping/unmapping to keep these drivers from stalling
// NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either
void create(VkBufferUsageFlags usage, usz size, const char* name, usz guard = 0x10000, VkBool32 notify = VK_FALSE);
void destroy();
void* map(usz offset, usz size);
void unmap(bool force = false);
void sync(const vk::command_buffer& cmd);
// Properties
bool is_dirty() const;
bool is_critical() const override;
};
extern data_heap* get_upload_heap();
}

View file

@ -0,0 +1,74 @@
#pragma once
#include "../VulkanAPI.h"
#include "device.h"
#include <vector>
namespace vk
{
class descriptor_pool
{
const vk::render_device* m_owner = nullptr;
std::vector<VkDescriptorPool> m_device_pools;
VkDescriptorPool m_current_pool_handle = VK_NULL_HANDLE;
u32 m_current_pool_index = 0;
public:
descriptor_pool() = default;
~descriptor_pool() = default;
void create(const vk::render_device& dev, VkDescriptorPoolSize* sizes, u32 size_descriptors_count, u32 max_sets, u8 subpool_count)
{
ensure(subpool_count);
VkDescriptorPoolCreateInfo infos = {};
infos.flags = 0;
infos.maxSets = max_sets;
infos.poolSizeCount = size_descriptors_count;
infos.pPoolSizes = sizes;
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
m_owner = &dev;
m_device_pools.resize(subpool_count);
for (auto& pool : m_device_pools)
{
CHECK_RESULT(vkCreateDescriptorPool(dev, &infos, nullptr, &pool));
}
m_current_pool_handle = m_device_pools[0];
}
void destroy()
{
if (m_device_pools.empty()) return;
for (auto& pool : m_device_pools)
{
vkDestroyDescriptorPool((*m_owner), pool, nullptr);
pool = VK_NULL_HANDLE;
}
m_owner = nullptr;
}
bool valid()
{
return (!m_device_pools.empty());
}
operator VkDescriptorPool()
{
return m_current_pool_handle;
}
void reset(VkDescriptorPoolResetFlags flags)
{
m_current_pool_index = (m_current_pool_index + 1) % u32(m_device_pools.size());
m_current_pool_handle = m_device_pools[m_current_pool_index];
CHECK_RESULT(vkResetDescriptorPool(*m_owner, m_current_pool_handle, flags));
}
};
}

View file

@ -1,10 +1,13 @@
#include "physical_device.h"
#include "supported_extensions.h"
#include "device.h"
#include "instance.hpp"
#include "util/logs.hpp"
#include "Emu/system_config.h"
namespace vk
{
// Global shared render device
const render_device* g_render_device = nullptr;
void physical_device::get_physical_device_features(bool allow_extensions)
{
if (!allow_extensions)
@ -222,6 +225,265 @@ namespace vk
return parent;
}
// Render Device - The actual usable device
void render_device::create(vk::physical_device& pdev, u32 graphics_queue_idx)
{
float queue_priorities[1] = { 0.f };
pgpu = &pdev;
VkDeviceQueueCreateInfo queue = {};
queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue.pNext = NULL;
queue.queueFamilyIndex = graphics_queue_idx;
queue.queueCount = 1;
queue.pQueuePriorities = queue_priorities;
// Set up instance information
std::vector<const char*> requested_extensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
// Enable hardware features manually
// Currently we require:
// 1. Anisotropic sampling
// 2. DXT support
// 3. Indexable storage buffers
VkPhysicalDeviceFeatures enabled_features{};
if (pgpu->shader_types_support.allow_float16)
{
requested_extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (pgpu->conditional_render_support)
{
requested_extensions.push_back(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
}
if (pgpu->unrestricted_depth_range_support)
{
requested_extensions.push_back(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
enabled_features.logicOp = VK_TRUE;
enabled_features.depthClamp = VK_TRUE;
enabled_features.depthBounds = VK_TRUE;
enabled_features.wideLines = VK_TRUE;
enabled_features.largePoints = VK_TRUE;
enabled_features.shaderFloat64 = VK_TRUE;
if (g_cfg.video.antialiasing_level != msaa_level::none)
{
// MSAA features
if (!pgpu->features.shaderStorageImageMultisample || !pgpu->features.shaderStorageImageWriteWithoutFormat)
{
// TODO: Slow fallback to emulate this
// Just warn and let the driver decide whether to crash or not
rsx_log.fatal("Your GPU driver does not support some required MSAA features. Expect problems.");
}
enabled_features.sampleRateShading = VK_TRUE;
enabled_features.alphaToOne = VK_TRUE;
enabled_features.shaderStorageImageMultisample = VK_TRUE;
// enabled_features.shaderStorageImageReadWithoutFormat = VK_TRUE; // Unused currently, may be needed soon
enabled_features.shaderStorageImageWriteWithoutFormat = VK_TRUE;
}
// enabled_features.shaderSampledImageArrayDynamicIndexing = TRUE; // Unused currently but will be needed soon
enabled_features.shaderClipDistance = VK_TRUE;
// enabled_features.shaderCullDistance = VK_TRUE; // Alt notation of clip distance
enabled_features.samplerAnisotropy = VK_TRUE;
enabled_features.textureCompressionBC = VK_TRUE;
enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_TRUE;
// Optionally disable unsupported stuff
if (!pgpu->features.shaderFloat64)
{
rsx_log.error("Your GPU does not support double precision floats in shaders. Graphics may not work correctly.");
enabled_features.shaderFloat64 = VK_FALSE;
}
if (!pgpu->features.depthBounds)
{
rsx_log.error("Your GPU does not support depth bounds testing. Graphics may not work correctly.");
enabled_features.depthBounds = VK_FALSE;
}
if (!pgpu->features.sampleRateShading && enabled_features.sampleRateShading)
{
rsx_log.error("Your GPU does not support sample rate shading for multisampling. Graphics may be inaccurate when MSAA is enabled.");
enabled_features.sampleRateShading = VK_FALSE;
}
if (!pgpu->features.alphaToOne && enabled_features.alphaToOne)
{
// AMD proprietary drivers do not expose alphaToOne support
rsx_log.error("Your GPU does not support alpha-to-one for multisampling. Graphics may be inaccurate when MSAA is enabled.");
enabled_features.alphaToOne = VK_FALSE;
}
VkDeviceCreateInfo device = {};
device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device.pNext = nullptr;
device.queueCreateInfoCount = 1;
device.pQueueCreateInfos = &queue;
device.enabledLayerCount = 0;
device.ppEnabledLayerNames = nullptr; // Deprecated
device.enabledExtensionCount = ::size32(requested_extensions);
device.ppEnabledExtensionNames = requested_extensions.data();
device.pEnabledFeatures = &enabled_features;
VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
if (pgpu->shader_types_support.allow_float16)
{
// Allow use of f16 type in shaders if possible
shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
shader_support_info.shaderFloat16 = VK_TRUE;
device.pNext = &shader_support_info;
rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible.");
}
else
{
rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t.");
}
CHECK_RESULT(vkCreateDevice(*pgpu, &device, nullptr, &dev));
// Import optional function endpoints
if (pgpu->conditional_render_support)
{
cmdBeginConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdBeginConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT"));
cmdEndConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdEndConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT"));
}
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
if (g_cfg.video.disable_vulkan_mem_allocator)
m_allocator = std::make_unique<vk::mem_allocator_vk>(dev, pdev);
else
m_allocator = std::make_unique<vk::mem_allocator_vma>(dev, pdev);
}
void render_device::destroy()
{
if (dev && pgpu)
{
if (m_allocator)
{
m_allocator->destroy();
m_allocator.reset();
}
vkDestroyDevice(dev, nullptr);
dev = nullptr;
memory_map = {};
m_formats_support = {};
}
}
const VkFormatProperties render_device::get_format_properties(VkFormat format)
{
auto found = pgpu->format_properties.find(format);
if (found != pgpu->format_properties.end())
{
return found->second;
}
auto& props = pgpu->format_properties[format];
vkGetPhysicalDeviceFormatProperties(*pgpu, format, &props);
return props;
}
bool render_device::get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const
{
VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties();
for (u32 i = 0; i < 32; i++)
{
if ((typeBits & 1) == 1)
{
if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask)
{
if (type_index)
{
*type_index = i;
}
return true;
}
}
typeBits >>= 1;
}
return false;
}
const physical_device& render_device::gpu() const
{
return *pgpu;
}
const memory_type_mapping& render_device::get_memory_mapping() const
{
return memory_map;
}
const gpu_formats_support& render_device::get_formats_support() const
{
return m_formats_support;
}
const pipeline_binding_table& render_device::get_pipeline_binding_table() const
{
return m_pipeline_binding_table;
}
const gpu_shader_types_support& render_device::get_shader_types_support() const
{
return pgpu->shader_types_support;
}
bool render_device::get_shader_stencil_export_support() const
{
return pgpu->stencil_export_support;
}
bool render_device::get_depth_bounds_support() const
{
return pgpu->features.depthBounds != VK_FALSE;
}
bool render_device::get_alpha_to_one_support() const
{
return pgpu->features.alphaToOne != VK_FALSE;
}
bool render_device::get_conditional_render_support() const
{
return pgpu->conditional_render_support;
}
bool render_device::get_unrestricted_depth_range_support() const
{
return pgpu->unrestricted_depth_range_support;
}
mem_allocator_base* render_device::get_allocator() const
{
return m_allocator.get();
}
render_device::operator VkDevice() const
{
return dev;
}
// Shared Util
memory_type_mapping get_memory_mapping(const vk::physical_device& dev)
{
VkPhysicalDevice pdev = dev;
@ -229,10 +491,10 @@ namespace vk
vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);
memory_type_mapping result;
result.device_local = VK_MAX_MEMORY_TYPES;
result.device_local = VK_MAX_MEMORY_TYPES;
result.host_visible_coherent = VK_MAX_MEMORY_TYPES;
bool host_visible_cached = false;
bool host_visible_cached = false;
VkDeviceSize host_visible_vram_size = 0;
VkDeviceSize device_local_vram_size = 0;
@ -245,22 +507,22 @@ namespace vk
{
if (device_local_vram_size < heap.size)
{
result.device_local = i;
result.device_local = i;
device_local_vram_size = heap.size;
}
}
bool is_host_visible = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
bool is_host_visible = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
bool is_host_coherent = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
bool is_cached = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
bool is_cached = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
if (is_host_coherent && is_host_visible)
{
if ((is_cached && !host_visible_cached) || (host_visible_vram_size < heap.size))
{
result.host_visible_coherent = i;
host_visible_vram_size = heap.size;
host_visible_cached = is_cached;
host_visible_vram_size = heap.size;
host_visible_cached = is_cached;
}
}
}
@ -280,11 +542,11 @@ namespace vk
vkGetPhysicalDeviceFormatProperties(dev, VK_FORMAT_D24_UNORM_S8_UINT, &props);
result.d24_unorm_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
!!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT) && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT);
!!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT) && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT);
vkGetPhysicalDeviceFormatProperties(dev, VK_FORMAT_D32_SFLOAT_S8_UINT, &props);
result.d32_sfloat_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
!!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT);
!!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT);
// Hide d24_s8 if force high precision z buffer is enabled
if (g_cfg.video.force_high_precision_z_buffer && result.d32_sfloat_s8)
@ -297,7 +559,7 @@ namespace vk
// Check if device supports RGBA8 format
vkGetPhysicalDeviceFormatProperties(dev, VK_FORMAT_R8G8B8A8_UNORM, &props);
if (!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) || !(props.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) ||
!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
{
// Non-fatal. Most games use BGRA layout due to legacy reasons as old GPUs typically supported BGRA and RGBA was emulated.
rsx_log.error("Your GPU and/or driver does not support RGBA8 format. This can cause problems in some rare games that use this memory layout.");
@ -312,9 +574,9 @@ namespace vk
pipeline_binding_table result{};
// Need to check how many samplers are supported by the driver
const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u);
const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u);
result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers;
result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4;
result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4;
return result;
}
}

View file

@ -3,6 +3,7 @@
#include "../VulkanAPI.h"
#include "chip_class.h"
#include "pipeline_binding_table.h"
#include "memory.h"
#include <string>
#include <vector>
@ -75,7 +76,51 @@ namespace vk
operator VkInstance() const;
};
class render_device
{
physical_device* pgpu = nullptr;
memory_type_mapping memory_map{};
gpu_formats_support m_formats_support{};
pipeline_binding_table m_pipeline_binding_table{};
std::unique_ptr<mem_allocator_base> m_allocator;
VkDevice dev = VK_NULL_HANDLE;
public:
// Exported device endpoints
PFN_vkCmdBeginConditionalRenderingEXT cmdBeginConditionalRenderingEXT = nullptr;
PFN_vkCmdEndConditionalRenderingEXT cmdEndConditionalRenderingEXT = nullptr;
public:
render_device() = default;
~render_device() = default;
void create(vk::physical_device& pdev, u32 graphics_queue_idx);
void destroy();
const VkFormatProperties get_format_properties(VkFormat format);
bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const;
const physical_device& gpu() const;
const memory_type_mapping& get_memory_mapping() const;
const gpu_formats_support& get_formats_support() const;
const pipeline_binding_table& get_pipeline_binding_table() const;
const gpu_shader_types_support& get_shader_types_support() const;
bool get_shader_stencil_export_support() const;
bool get_depth_bounds_support() const;
bool get_alpha_to_one_support() const;
bool get_conditional_render_support() const;
bool get_unrestricted_depth_range_support() const;
mem_allocator_base* get_allocator() const;
operator VkDevice() const;
};
memory_type_mapping get_memory_mapping(const physical_device& dev);
gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev);
pipeline_binding_table get_pipeline_binding_table(const physical_device& dev);
extern const render_device* g_render_device;
}

View file

@ -1,54 +0,0 @@
#include "fence.h"
#include "shared.h"
namespace vk
{
#ifdef _MSC_VER
extern "C" void _mm_pause();
#endif
fence::fence(VkDevice dev)
{
owner = dev;
VkFenceCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(dev, &info, nullptr, &handle));
}
fence::~fence()
{
if (handle)
{
vkDestroyFence(owner, handle, nullptr);
handle = VK_NULL_HANDLE;
}
}
void fence::reset()
{
vkResetFences(owner, 1, &handle);
flushed.release(false);
}
void fence::signal_flushed()
{
flushed.release(true);
}
void fence::wait_flush()
{
while (!flushed)
{
#ifdef _MSC_VER
_mm_pause();
#else
__builtin_ia32_pause();
#endif
}
}
fence::operator bool() const
{
return (handle != VK_NULL_HANDLE);
}
}

View file

@ -1,23 +0,0 @@
#pragma once
#include "../VulkanAPI.h"
#include "util/atomic.hpp"
namespace vk
{
struct fence
{
atomic_t<bool> flushed = false;
VkFence handle = VK_NULL_HANDLE;
VkDevice owner = VK_NULL_HANDLE;
fence(VkDevice dev);
~fence();
void reset();
void signal_flushed();
void wait_flush();
operator bool() const;
};
}

View file

@ -0,0 +1,84 @@
#pragma once
#include "../VulkanAPI.h"
#include "image.h"
#include <memory>
#include <vector>
namespace vk
{
struct framebuffer
{
VkFramebuffer value;
VkFramebufferCreateInfo info = {};
std::vector<std::unique_ptr<vk::image_view>> attachments;
u32 m_width = 0;
u32 m_height = 0;
public:
framebuffer(VkDevice dev, VkRenderPass pass, u32 width, u32 height, std::vector<std::unique_ptr<vk::image_view>>&& atts)
: attachments(std::move(atts))
, m_device(dev)
{
std::vector<VkImageView> image_view_array(attachments.size());
usz i = 0;
for (const auto& att : attachments)
{
image_view_array[i++] = att->value;
}
info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
info.width = width;
info.height = height;
info.attachmentCount = static_cast<u32>(image_view_array.size());
info.pAttachments = image_view_array.data();
info.renderPass = pass;
info.layers = 1;
m_width = width;
m_height = height;
CHECK_RESULT(vkCreateFramebuffer(dev, &info, nullptr, &value));
}
~framebuffer()
{
vkDestroyFramebuffer(m_device, value, nullptr);
}
u32 width()
{
return m_width;
}
u32 height()
{
return m_height;
}
bool matches(std::vector<vk::image*> fbo_images, u32 width, u32 height)
{
if (m_width != width || m_height != height)
return false;
if (fbo_images.size() != attachments.size())
return false;
for (uint n = 0; n < fbo_images.size(); ++n)
{
if (attachments[n]->info.image != fbo_images[n]->value ||
attachments[n]->info.format != fbo_images[n]->info.format)
return false;
}
return true;
}
framebuffer(const framebuffer&) = delete;
framebuffer(framebuffer&&) = delete;
private:
VkDevice m_device;
};
}

View file

@ -0,0 +1,350 @@
#include "stdafx.h"
#include "barriers.h"
#include "device.h"
#include "image.h"
#include "image_helpers.h"
#include <memory>
namespace vk
{
void image::validate(const vk::render_device& dev, const VkImageCreateInfo& info) const
{
const auto gpu_limits = dev.gpu().get_limits();
u32 longest_dim, dim_limit;
switch (info.imageType)
{
case VK_IMAGE_TYPE_1D:
longest_dim = info.extent.width;
dim_limit = gpu_limits.maxImageDimension1D;
break;
case VK_IMAGE_TYPE_2D:
longest_dim = std::max(info.extent.width, info.extent.height);
dim_limit = (info.flags == VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) ? gpu_limits.maxImageDimensionCube : gpu_limits.maxImageDimension2D;
break;
case VK_IMAGE_TYPE_3D:
longest_dim = std::max({ info.extent.width, info.extent.height, info.extent.depth });
dim_limit = gpu_limits.maxImageDimension3D;
break;
default:
fmt::throw_exception("Unreachable");
}
if (longest_dim > dim_limit)
{
// Longest dimension exceeds the limit. Can happen when using MSAA + very high resolution scaling
// Just kill the application at this point.
fmt::throw_exception(
"The renderer requested an image larger than the limit allowed for by your GPU hardware. "
"Turn down your resolution scale and/or disable MSAA to fit within the image budget.");
}
}
image::image(const vk::render_device& dev,
u32 memory_type_index,
u32 access_flags,
VkImageType image_type,
VkFormat format,
u32 width, u32 height, u32 depth,
u32 mipmaps, u32 layers,
VkSampleCountFlagBits samples,
VkImageLayout initial_layout,
VkImageTiling tiling,
VkImageUsageFlags usage,
VkImageCreateFlags image_flags,
rsx::format_class format_class)
: current_layout(initial_layout)
, m_device(dev)
{
info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
info.imageType = image_type;
info.format = format;
info.extent = { width, height, depth };
info.mipLevels = mipmaps;
info.arrayLayers = layers;
info.samples = samples;
info.tiling = tiling;
info.usage = usage;
info.flags = image_flags;
info.initialLayout = initial_layout;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
validate(dev, info);
CHECK_RESULT(vkCreateImage(m_device, &info, nullptr, &value));
VkMemoryRequirements memory_req;
vkGetImageMemoryRequirements(m_device, value, &memory_req);
if (!(memory_req.memoryTypeBits & (1 << memory_type_index)))
{
//Suggested memory type is incompatible with this memory type.
//Go through the bitset and test for requested props.
if (!dev.get_compatible_memory_type(memory_req.memoryTypeBits, access_flags, &memory_type_index))
fmt::throw_exception("No compatible memory type was found!");
}
memory = std::make_shared<vk::memory_block>(m_device, memory_req.size, memory_req.alignment, memory_type_index);
CHECK_RESULT(vkBindImageMemory(m_device, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset()));
m_storage_aspect = get_aspect_flags(format);
if (format_class == RSX_FORMAT_CLASS_UNDEFINED)
{
if (m_storage_aspect != VK_IMAGE_ASPECT_COLOR_BIT)
{
rsx_log.error("Depth/stencil textures must have format class explicitly declared");
}
else
{
format_class = RSX_FORMAT_CLASS_COLOR;
}
}
m_format_class = format_class;
}
// TODO: Ctor that uses a provided memory heap
image::~image()
{
vkDestroyImage(m_device, value, nullptr);
}
u32 image::width() const
{
return info.extent.width;
}
u32 image::height() const
{
return info.extent.height;
}
u32 image::depth() const
{
return info.extent.depth;
}
u32 image::mipmaps() const
{
return info.mipLevels;
}
u32 image::layers() const
{
return info.arrayLayers;
}
u8 image::samples() const
{
return u8(info.samples);
}
VkFormat image::format() const
{
return info.format;
}
VkImageAspectFlags image::aspect() const
{
return m_storage_aspect;
}
rsx::format_class image::format_class() const
{
return m_format_class;
}
void image::push_layout(VkCommandBuffer cmd, VkImageLayout layout)
{
m_layout_stack.push(current_layout);
change_image_layout(cmd, this, layout);
}
void image::push_barrier(VkCommandBuffer cmd, VkImageLayout layout)
{
m_layout_stack.push(current_layout);
insert_texture_barrier(cmd, this, layout);
}
void image::pop_layout(VkCommandBuffer cmd)
{
ensure(!m_layout_stack.empty());
auto layout = m_layout_stack.top();
m_layout_stack.pop();
change_image_layout(cmd, this, layout);
}
void image::change_layout(command_buffer& cmd, VkImageLayout new_layout)
{
if (current_layout == new_layout)
return;
ensure(m_layout_stack.empty());
change_image_layout(cmd, this, new_layout);
}
image_view::image_view(VkDevice dev, VkImage image, VkImageViewType view_type, VkFormat format, VkComponentMapping mapping, VkImageSubresourceRange range)
: m_device(dev)
{
info.format = format;
info.image = image;
info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
info.components = mapping;
info.viewType = view_type;
info.subresourceRange = range;
create_impl();
}
image_view::image_view(VkDevice dev, VkImageViewCreateInfo create_info)
: info(create_info)
, m_device(dev)
{
create_impl();
}
image_view::image_view(VkDevice dev, vk::image* resource, VkImageViewType view_type, const VkComponentMapping& mapping, const VkImageSubresourceRange& range)
: m_device(dev), m_resource(resource)
{
info.format = resource->info.format;
info.image = resource->value;
info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
info.components = mapping;
info.subresourceRange = range;
if (view_type == VK_IMAGE_VIEW_TYPE_MAX_ENUM)
{
switch (resource->info.imageType)
{
case VK_IMAGE_TYPE_1D:
info.viewType = VK_IMAGE_VIEW_TYPE_1D;
break;
case VK_IMAGE_TYPE_2D:
if (resource->info.flags == VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
info.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
else if (resource->info.arrayLayers == 1)
info.viewType = VK_IMAGE_VIEW_TYPE_2D;
else
info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
break;
case VK_IMAGE_TYPE_3D:
info.viewType = VK_IMAGE_VIEW_TYPE_3D;
break;
default:
fmt::throw_exception("Unreachable");
}
info.subresourceRange.layerCount = resource->info.arrayLayers;
}
else
{
info.viewType = view_type;
}
create_impl();
}
image_view::~image_view()
{
vkDestroyImageView(m_device, value, nullptr);
}
u32 image_view::encoded_component_map() const
{
#if (VK_DISABLE_COMPONENT_SWIZZLE)
u32 result = static_cast<u32>(info.components.a) - 1;
result |= (static_cast<u32>(info.components.r) - 1) << 3;
result |= (static_cast<u32>(info.components.g) - 1) << 6;
result |= (static_cast<u32>(info.components.b) - 1) << 9;
return result;
#else
return 0;
#endif
}
vk::image* image_view::image() const
{
return m_resource;
}
void image_view::create_impl()
{
#if (VK_DISABLE_COMPONENT_SWIZZLE)
// Force identity
const auto mapping = info.components;
info.components = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY };
#endif
CHECK_RESULT(vkCreateImageView(m_device, &info, nullptr, &value));
#if (VK_DISABLE_COMPONENT_SWIZZLE)
// Restore requested mapping
info.components = mapping;
#endif
}
image_view* viewable_image::get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap, VkImageAspectFlags mask)
{
if (remap_encoding == VK_REMAP_IDENTITY)
{
if (native_component_map.a == VK_COMPONENT_SWIZZLE_A &&
native_component_map.r == VK_COMPONENT_SWIZZLE_R &&
native_component_map.g == VK_COMPONENT_SWIZZLE_G &&
native_component_map.b == VK_COMPONENT_SWIZZLE_B)
{
remap_encoding = 0xAAE4;
}
}
auto found = views.equal_range(remap_encoding);
for (auto It = found.first; It != found.second; ++It)
{
if (It->second->info.subresourceRange.aspectMask & mask)
{
return It->second.get();
}
}
VkComponentMapping real_mapping;
switch (remap_encoding)
{
case VK_REMAP_IDENTITY:
real_mapping = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY };
break;
case 0xAAE4:
real_mapping = native_component_map;
break;
default:
real_mapping = vk::apply_swizzle_remap
(
{ native_component_map.a, native_component_map.r, native_component_map.g, native_component_map.b },
remap
);
break;
}
const VkImageSubresourceRange range = { aspect() & mask, 0, info.mipLevels, 0, info.arrayLayers };
ensure(range.aspectMask);
auto view = std::make_unique<vk::image_view>(*g_render_device, this, VK_IMAGE_VIEW_TYPE_MAX_ENUM, real_mapping, range);
auto result = view.get();
views.emplace(remap_encoding, std::move(view));
return result;
}
void viewable_image::set_native_component_layout(VkComponentMapping new_layout)
{
if (new_layout.r != native_component_map.r ||
new_layout.g != native_component_map.g ||
new_layout.b != native_component_map.b ||
new_layout.a != native_component_map.a)
{
native_component_map = new_layout;
views.clear();
}
}
}

View file

@ -0,0 +1,126 @@
#pragma once
#include "../VulkanAPI.h"
#include "../../Common/TextureUtils.h"
#include "commands.h"
#include "device.h"
#include "memory.h"
#include <stack>
//using enum rsx::format_class;
using namespace ::rsx::format_class_;
#ifdef __APPLE__
#define VK_DISABLE_COMPONENT_SWIZZLE 1
#else
#define VK_DISABLE_COMPONENT_SWIZZLE 0
#endif
namespace vk
{
enum : u32// special remap_encoding enums
{
VK_REMAP_IDENTITY = 0xCAFEBABE, // Special view encoding to return an identity image view
VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF // Special encoding for multisampled images; returns a multisampled image view
};
class image
{
std::stack<VkImageLayout> m_layout_stack;
VkImageAspectFlags m_storage_aspect = 0;
rsx::format_class m_format_class = RSX_FORMAT_CLASS_UNDEFINED;
void validate(const vk::render_device& dev, const VkImageCreateInfo& info) const;
public:
VkImage value = VK_NULL_HANDLE;
VkComponentMapping native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
VkImageLayout current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImageCreateInfo info = {};
std::shared_ptr<vk::memory_block> memory;
image(const vk::render_device& dev,
u32 memory_type_index,
u32 access_flags,
VkImageType image_type,
VkFormat format,
u32 width, u32 height, u32 depth,
u32 mipmaps, u32 layers,
VkSampleCountFlagBits samples,
VkImageLayout initial_layout,
VkImageTiling tiling,
VkImageUsageFlags usage,
VkImageCreateFlags image_flags,
rsx::format_class format_class = RSX_FORMAT_CLASS_UNDEFINED);
virtual ~image();
image(const image&) = delete;
image(image&&) = delete;
// Properties
u32 width() const;
u32 height() const;
u32 depth() const;
u32 mipmaps() const;
u32 layers() const;
u8 samples() const;
VkFormat format() const;
VkImageAspectFlags aspect() const;
rsx::format_class format_class() const;
// Pipeline management
void push_layout(VkCommandBuffer cmd, VkImageLayout layout);
void push_barrier(VkCommandBuffer cmd, VkImageLayout layout);
void pop_layout(VkCommandBuffer cmd);
void change_layout(command_buffer& cmd, VkImageLayout new_layout);
private:
VkDevice m_device;
};
struct image_view
{
VkImageView value = VK_NULL_HANDLE;
VkImageViewCreateInfo info = {};
image_view(VkDevice dev, VkImage image, VkImageViewType view_type, VkFormat format, VkComponentMapping mapping, VkImageSubresourceRange range);
image_view(VkDevice dev, VkImageViewCreateInfo create_info);
image_view(VkDevice dev, vk::image* resource,
VkImageViewType view_type = VK_IMAGE_VIEW_TYPE_MAX_ENUM,
const VkComponentMapping& mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A },
const VkImageSubresourceRange& range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
~image_view();
u32 encoded_component_map() const;
vk::image* image() const;
image_view(const image_view&) = delete;
image_view(image_view&&) = delete;
private:
VkDevice m_device;
vk::image* m_resource = nullptr;
void create_impl();
};
class viewable_image : public image
{
private:
std::unordered_multimap<u32, std::unique_ptr<vk::image_view>> views;
public:
using image::image;
virtual image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap,
VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT);
void set_native_component_layout(VkComponentMapping new_layout);
};
}

View file

@ -0,0 +1,216 @@
#include "stdafx.h"
#include "image_helpers.h"
#include "image.h"
#include "util/logs.hpp"
#include "../VKRenderPass.h"
#include "../../gcm_enums.h"
namespace vk
{
VkComponentMapping default_component_map =
{
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
VkImageAspectFlags get_aspect_flags(VkFormat format)
{
switch (format)
{
default:
return VK_IMAGE_ASPECT_COLOR_BIT;
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D32_SFLOAT:
return VK_IMAGE_ASPECT_DEPTH_BIT;
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
}
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector)
{
VkComponentSwizzle final_mapping[4] = {};
for (u8 channel = 0; channel < 4; ++channel)
{
switch (remap_vector.second[channel])
{
case CELL_GCM_TEXTURE_REMAP_ONE:
final_mapping[channel] = VK_COMPONENT_SWIZZLE_ONE;
break;
case CELL_GCM_TEXTURE_REMAP_ZERO:
final_mapping[channel] = VK_COMPONENT_SWIZZLE_ZERO;
break;
case CELL_GCM_TEXTURE_REMAP_REMAP:
final_mapping[channel] = base_remap[remap_vector.first[channel]];
break;
default:
rsx_log.error("Unknown remap lookup value %d", remap_vector.second[channel]);
}
}
return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] };
}
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
//Prepare an image to match the new layout..
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout;
barrier.oldLayout = current_layout;
barrier.image = image;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = 0;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = range;
VkPipelineStageFlags src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkPipelineStageFlags dst_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
switch (new_layout)
{
case VK_IMAGE_LAYOUT_GENERAL:
// Avoid this layout as it is unoptimized
barrier.dstAccessMask =
{
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
};
dst_stage =
{
VK_PIPELINE_STAGE_TRANSFER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
};
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dst_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_PREINITIALIZED:
fmt::throw_exception("Attempted to transition to an invalid layout");
}
switch (current_layout)
{
case VK_IMAGE_LAYOUT_GENERAL:
// Avoid this layout as it is unoptimized
if (new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ||
new_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
{
if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
{
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
else
{
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
}
}
else if (new_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ||
new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
{
// Finish reading before writing
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
}
else
{
barrier.srcAccessMask =
{
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
};
src_stage =
{
VK_PIPELINE_STAGE_TRANSFER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
};
}
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
break; //TODO Investigate what happens here
}
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{
if (image->current_layout == new_layout) return;
change_image_layout(cmd, image->value, image->current_layout, new_layout, range);
image->current_layout = new_layout;
}
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout)
{
if (image->current_layout == new_layout) return;
change_image_layout(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, image->mipmaps(), 0, image->layers() });
image->current_layout = new_layout;
}
}

View file

@ -0,0 +1,15 @@
#pragma once
#include "../VulkanAPI.h"
namespace vk
{
class image;
extern VkComponentMapping default_component_map;
VkImageAspectFlags get_aspect_flags(VkFormat format);
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector);
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range);
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout);
}

View file

@ -0,0 +1,414 @@
#pragma once
#include "../VulkanAPI.h"
#include "swapchain.hpp"
#include <algorithm>
#include <vector>
namespace vk
{
class supported_extensions
{
private:
std::vector<VkExtensionProperties> m_vk_exts;
public:
enum enumeration_class
{
instance = 0,
device = 1
};
supported_extensions(enumeration_class _class, const char* layer_name = nullptr, VkPhysicalDevice pdev = VK_NULL_HANDLE)
{
u32 count;
if (_class == enumeration_class::instance)
{
if (vkEnumerateInstanceExtensionProperties(layer_name, &count, nullptr) != VK_SUCCESS)
return;
}
else
{
ensure(pdev);
if (vkEnumerateDeviceExtensionProperties(pdev, layer_name, &count, nullptr) != VK_SUCCESS)
return;
}
m_vk_exts.resize(count);
if (_class == enumeration_class::instance)
{
vkEnumerateInstanceExtensionProperties(layer_name, &count, m_vk_exts.data());
}
else
{
vkEnumerateDeviceExtensionProperties(pdev, layer_name, &count, m_vk_exts.data());
}
}
bool is_supported(const char* ext)
{
return std::any_of(m_vk_exts.cbegin(), m_vk_exts.cend(), [&](const VkExtensionProperties& p) { return std::strcmp(p.extensionName, ext) == 0; });
}
};
class instance
{
private:
std::vector<physical_device> gpus;
VkInstance m_instance = VK_NULL_HANDLE;
VkSurfaceKHR m_surface = VK_NULL_HANDLE;
PFN_vkDestroyDebugReportCallbackEXT destroyDebugReportCallback = nullptr;
PFN_vkCreateDebugReportCallbackEXT createDebugReportCallback = nullptr;
VkDebugReportCallbackEXT m_debugger = nullptr;
bool extensions_loaded = false;
public:
instance() = default;
~instance()
{
if (m_instance)
{
destroy();
}
}
void destroy()
{
if (!m_instance) return;
if (m_debugger)
{
destroyDebugReportCallback(m_instance, m_debugger, nullptr);
m_debugger = nullptr;
}
if (m_surface)
{
vkDestroySurfaceKHR(m_instance, m_surface, nullptr);
m_surface = VK_NULL_HANDLE;
}
vkDestroyInstance(m_instance, nullptr);
m_instance = VK_NULL_HANDLE;
}
void enable_debugging()
{
if (!g_cfg.video.debug_output) return;
PFN_vkDebugReportCallbackEXT callback = vk::dbgFunc;
createDebugReportCallback = reinterpret_cast<PFN_vkCreateDebugReportCallbackEXT>(vkGetInstanceProcAddr(m_instance, "vkCreateDebugReportCallbackEXT"));
destroyDebugReportCallback = reinterpret_cast<PFN_vkDestroyDebugReportCallbackEXT>(vkGetInstanceProcAddr(m_instance, "vkDestroyDebugReportCallbackEXT"));
VkDebugReportCallbackCreateInfoEXT dbgCreateInfo = {};
dbgCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT;
dbgCreateInfo.pfnCallback = callback;
dbgCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT;
CHECK_RESULT(createDebugReportCallback(m_instance, &dbgCreateInfo, NULL, &m_debugger));
}
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
#endif
bool create(const char* app_name, bool fast = false)
{
// Initialize a vulkan instance
VkApplicationInfo app = {};
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
app.pApplicationName = app_name;
app.applicationVersion = 0;
app.pEngineName = app_name;
app.engineVersion = 0;
app.apiVersion = VK_API_VERSION_1_0;
// Set up instance information
std::vector<const char*> extensions;
std::vector<const char*> layers;
if (!fast)
{
extensions_loaded = true;
supported_extensions support(supported_extensions::instance);
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
if (support.is_supported(VK_EXT_DEBUG_REPORT_EXTENSION_NAME))
{
extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
}
if (support.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME))
{
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
}
#ifdef _WIN32
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
#elif defined(__APPLE__)
extensions.push_back(VK_MVK_MACOS_SURFACE_EXTENSION_NAME);
#else
bool found_surface_ext = false;
#ifdef HAVE_X11
if (support.is_supported(VK_KHR_XLIB_SURFACE_EXTENSION_NAME))
{
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
found_surface_ext = true;
}
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
if (support.is_supported(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME))
{
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
found_surface_ext = true;
}
#endif //(WAYLAND)
if (!found_surface_ext)
{
rsx_log.error("Could not find a supported Vulkan surface extension");
return 0;
}
#endif //(WIN32, __APPLE__)
if (g_cfg.video.debug_output)
layers.push_back("VK_LAYER_KHRONOS_validation");
}
VkInstanceCreateInfo instance_info = {};
instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instance_info.pApplicationInfo = &app;
instance_info.enabledLayerCount = static_cast<u32>(layers.size());
instance_info.ppEnabledLayerNames = layers.data();
instance_info.enabledExtensionCount = fast ? 0 : static_cast<u32>(extensions.size());
instance_info.ppEnabledExtensionNames = fast ? nullptr : extensions.data();
if (VkResult result = vkCreateInstance(&instance_info, nullptr, &m_instance); result != VK_SUCCESS)
{
if (result == VK_ERROR_LAYER_NOT_PRESENT)
{
rsx_log.fatal("Could not initialize layer VK_LAYER_KHRONOS_validation");
}
return false;
}
return true;
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif
void bind()
{
// Register some global states
if (m_debugger)
{
destroyDebugReportCallback(m_instance, m_debugger, nullptr);
m_debugger = nullptr;
}
enable_debugging();
}
std::vector<physical_device>& enumerate_devices()
{
u32 num_gpus;
// This may fail on unsupported drivers, so just assume no devices
if (vkEnumeratePhysicalDevices(m_instance, &num_gpus, nullptr) != VK_SUCCESS)
return gpus;
if (gpus.size() != num_gpus)
{
std::vector<VkPhysicalDevice> pdevs(num_gpus);
gpus.resize(num_gpus);
CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, pdevs.data()));
for (u32 i = 0; i < num_gpus; ++i)
gpus[i].create(m_instance, pdevs[i], extensions_loaded);
}
return gpus;
}
swapchain_base* create_swapchain(display_handle_t window_handle, vk::physical_device& dev)
{
bool force_wm_reporting_off = false;
#ifdef _WIN32
using swapchain_NATIVE = swapchain_WIN32;
HINSTANCE hInstance = NULL;
VkWin32SurfaceCreateInfoKHR createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR;
createInfo.hinstance = hInstance;
createInfo.hwnd = window_handle;
CHECK_RESULT(vkCreateWin32SurfaceKHR(m_instance, &createInfo, NULL, &m_surface));
#elif defined(__APPLE__)
using swapchain_NATIVE = swapchain_MacOS;
VkMacOSSurfaceCreateInfoMVK createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK;
createInfo.pView = window_handle;
CHECK_RESULT(vkCreateMacOSSurfaceMVK(m_instance, &createInfo, NULL, &m_surface));
#else
#ifdef HAVE_X11
using swapchain_NATIVE = swapchain_X11;
#else
using swapchain_NATIVE = swapchain_Wayland;
#endif
std::visit([&](auto&& p)
{
using T = std::decay_t<decltype(p)>;
#ifdef HAVE_X11
if constexpr (std::is_same_v<T, std::pair<Display*, Window>>)
{
VkXlibSurfaceCreateInfoKHR createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR;
createInfo.dpy = p.first;
createInfo.window = p.second;
CHECK_RESULT(vkCreateXlibSurfaceKHR(this->m_instance, &createInfo, nullptr, &m_surface));
}
else
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
if constexpr (std::is_same_v<T, std::pair<wl_display*, wl_surface*>>)
{
VkWaylandSurfaceCreateInfoKHR createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR;
createInfo.display = p.first;
createInfo.surface = p.second;
CHECK_RESULT(vkCreateWaylandSurfaceKHR(this->m_instance, &createInfo, nullptr, &m_surface));
force_wm_reporting_off = true;
}
else
#endif
{
static_assert(std::conditional_t<true, std::false_type, T>::value, "Unhandled window_handle type in std::variant");
}
}, window_handle);
#endif
u32 device_queues = dev.get_queue_count();
std::vector<VkBool32> supportsPresent(device_queues, VK_FALSE);
bool present_possible = false;
for (u32 index = 0; index < device_queues; index++)
{
vkGetPhysicalDeviceSurfaceSupportKHR(dev, index, m_surface, &supportsPresent[index]);
}
for (const auto& value : supportsPresent)
{
if (value)
{
present_possible = true;
break;
}
}
if (!present_possible)
{
rsx_log.error("It is not possible for the currently selected GPU to present to the window (Likely caused by NVIDIA driver running the current display)");
}
// Search for a graphics and a present queue in the array of queue
// families, try to find one that supports both
u32 graphicsQueueNodeIndex = UINT32_MAX;
u32 presentQueueNodeIndex = UINT32_MAX;
for (u32 i = 0; i < device_queues; i++)
{
if ((dev.get_queue_properties(i).queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0)
{
if (graphicsQueueNodeIndex == UINT32_MAX)
graphicsQueueNodeIndex = i;
if (supportsPresent[i] == VK_TRUE)
{
graphicsQueueNodeIndex = i;
presentQueueNodeIndex = i;
break;
}
}
}
if (presentQueueNodeIndex == UINT32_MAX)
{
// If didn't find a queue that supports both graphics and present, then
// find a separate present queue.
for (u32 i = 0; i < device_queues; ++i)
{
if (supportsPresent[i] == VK_TRUE)
{
presentQueueNodeIndex = i;
break;
}
}
}
if (graphicsQueueNodeIndex == UINT32_MAX)
{
rsx_log.fatal("Failed to find a suitable graphics queue");
return nullptr;
}
if (graphicsQueueNodeIndex != presentQueueNodeIndex)
{
//Separate graphics and present, use headless fallback
present_possible = false;
}
if (!present_possible)
{
//Native(sw) swapchain
rsx_log.warning("Falling back to software present support (native windowing API)");
auto swapchain = new swapchain_NATIVE(dev, UINT32_MAX, graphicsQueueNodeIndex);
swapchain->create(window_handle);
return swapchain;
}
// Get the list of VkFormat's that are supported:
u32 formatCount;
CHECK_RESULT(vkGetPhysicalDeviceSurfaceFormatsKHR(dev, m_surface, &formatCount, nullptr));
std::vector<VkSurfaceFormatKHR> surfFormats(formatCount);
CHECK_RESULT(vkGetPhysicalDeviceSurfaceFormatsKHR(dev, m_surface, &formatCount, surfFormats.data()));
VkFormat format;
VkColorSpaceKHR color_space;
if (formatCount == 1 && surfFormats[0].format == VK_FORMAT_UNDEFINED)
{
format = VK_FORMAT_B8G8R8A8_UNORM;
}
else
{
if (!formatCount) fmt::throw_exception("Format count is zero!");
format = surfFormats[0].format;
//Prefer BGRA8_UNORM to avoid sRGB compression (RADV)
for (auto& surface_format : surfFormats)
{
if (surface_format.format == VK_FORMAT_B8G8R8A8_UNORM)
{
format = VK_FORMAT_B8G8R8A8_UNORM;
break;
}
}
}
color_space = surfFormats[0].colorSpace;
return new swapchain_WSI(dev, presentQueueNodeIndex, graphicsQueueNodeIndex, format, m_surface, color_space, force_wm_reporting_off);
}
};
}

View file

@ -10,82 +10,5 @@ namespace vk
{
// Memory Allocator - base class
class mem_allocator_base
{
public:
using mem_handle_t = void *;
mem_allocator_base(VkDevice dev, VkPhysicalDevice /*pdev*/) : m_device(dev) {}
virtual ~mem_allocator_base() = default;
virtual void destroy() = 0;
virtual mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index) = 0;
virtual void free(mem_handle_t mem_handle) = 0;
virtual void *map(mem_handle_t mem_handle, u64 offset, u64 size) = 0;
virtual void unmap(mem_handle_t mem_handle) = 0;
virtual VkDeviceMemory get_vk_device_memory(mem_handle_t mem_handle) = 0;
virtual u64 get_vk_device_memory_offset(mem_handle_t mem_handle) = 0;
virtual f32 get_memory_usage() = 0;
protected:
VkDevice m_device;
};
// Memory Allocator - Vulkan Memory Allocator
// https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
class mem_allocator_vma : public mem_allocator_base
{
public:
mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev);
~mem_allocator_vma() override = default;
void destroy() override;
mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index) override;
void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override;
void unmap(mem_handle_t mem_handle) override;
VkDeviceMemory get_vk_device_memory(mem_handle_t mem_handle) override;
u64 get_vk_device_memory_offset(mem_handle_t mem_handle) override;
f32 get_memory_usage() override;
private:
VmaAllocator m_allocator;
std::array<VmaBudget, VK_MAX_MEMORY_HEAPS> stats;
};
// Memory Allocator - built-in Vulkan device memory allocate/free
class mem_allocator_vk : public mem_allocator_base
{
public:
mem_allocator_vk(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev) {}
~mem_allocator_vk() override = default;
void destroy() override {}
mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, u32 memory_type_index) override;
void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 size) override;
void unmap(mem_handle_t mem_handle) override;
VkDeviceMemory get_vk_device_memory(mem_handle_t mem_handle) override;
u64 get_vk_device_memory_offset(mem_handle_t /*mem_handle*/) override;
f32 get_memory_usage() override;
};
void vmm_notify_memory_allocated(void* handle, u32 memory_type, u64 memory_size);
void vmm_notify_memory_freed(void* handle);
void vmm_reset();
void vmm_check_memory_usage();
bool vmm_handle_memory_pressure(rsx::problem_severity severity);
mem_allocator_base* get_current_mem_allocator();
}

View file

@ -1,11 +1,8 @@
#include "mem_allocator.h"
#include "util/logs.hpp"
#include "../VKHelpers.h"
#include "device.h"
#include "memory.h"
namespace vk
{
extern const render_device* g_current_renderer;
mem_allocator_vma::mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev)
{
// Initialize stats pool
@ -66,12 +63,12 @@ namespace vk
void* mem_allocator_vma::map(mem_handle_t mem_handle, u64 offset, u64 /*size*/)
{
void *data = nullptr;
void* data = nullptr;
CHECK_RESULT(vmaMapMemory(m_allocator, static_cast<VmaAllocation>(mem_handle), &data));
// Add offset
data = static_cast<u8 *>(data) + offset;
data = static_cast<u8*>(data) + offset;
return data;
}
@ -119,9 +116,9 @@ namespace vk
{
VkDeviceMemory memory;
VkMemoryAllocateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
info.allocationSize = block_sz;
info.memoryTypeIndex = memory_type_index;
info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
info.allocationSize = block_sz;
info.memoryTypeIndex = memory_type_index;
if (VkResult result = vkAllocateMemory(m_device, &info, nullptr, &memory); result != VK_SUCCESS)
{
@ -180,7 +177,38 @@ namespace vk
mem_allocator_base* get_current_mem_allocator()
{
ensure(g_current_renderer);
return g_current_renderer->get_allocator();
return g_render_device->get_allocator();
}
memory_block::memory_block(VkDevice dev, u64 block_sz, u64 alignment, u32 memory_type_index)
: m_device(dev)
{
m_mem_allocator = get_current_mem_allocator();
m_mem_handle = m_mem_allocator->alloc(block_sz, alignment, memory_type_index);
}
memory_block::~memory_block()
{
m_mem_allocator->free(m_mem_handle);
}
VkDeviceMemory memory_block::get_vk_device_memory()
{
return m_mem_allocator->get_vk_device_memory(m_mem_handle);
}
u64 memory_block::get_vk_device_memory_offset()
{
return m_mem_allocator->get_vk_device_memory_offset(m_mem_handle);
}
void* memory_block::map(u64 offset, u64 size)
{
return m_mem_allocator->map(m_mem_handle, offset, size);
}
void memory_block::unmap()
{
m_mem_allocator->unmap(m_mem_handle);
}
}

View file

@ -0,0 +1,109 @@
#pragma once
#include "../VulkanAPI.h"
#include "../../rsx_utils.h"
#include "shared.h"
#include "3rdparty/GPUOpen/include/vk_mem_alloc.h"
namespace vk
{
class mem_allocator_base
{
public:
using mem_handle_t = void*;
mem_allocator_base(VkDevice dev, VkPhysicalDevice /*pdev*/) : m_device(dev) {}
virtual ~mem_allocator_base() = default;
virtual void destroy() = 0;
virtual mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index) = 0;
virtual void free(mem_handle_t mem_handle) = 0;
virtual void* map(mem_handle_t mem_handle, u64 offset, u64 size) = 0;
virtual void unmap(mem_handle_t mem_handle) = 0;
virtual VkDeviceMemory get_vk_device_memory(mem_handle_t mem_handle) = 0;
virtual u64 get_vk_device_memory_offset(mem_handle_t mem_handle) = 0;
virtual f32 get_memory_usage() = 0;
protected:
VkDevice m_device;
};
// Memory Allocator - Vulkan Memory Allocator
// https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
class mem_allocator_vma : public mem_allocator_base
{
public:
mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev);
~mem_allocator_vma() override = default;
void destroy() override;
mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index) override;
void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override;
void unmap(mem_handle_t mem_handle) override;
VkDeviceMemory get_vk_device_memory(mem_handle_t mem_handle) override;
u64 get_vk_device_memory_offset(mem_handle_t mem_handle) override;
f32 get_memory_usage() override;
private:
VmaAllocator m_allocator;
std::array<VmaBudget, VK_MAX_MEMORY_HEAPS> stats;
};
// Memory Allocator - built-in Vulkan device memory allocate/free
class mem_allocator_vk : public mem_allocator_base
{
public:
mem_allocator_vk(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev) {}
~mem_allocator_vk() override = default;
void destroy() override {}
mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, u32 memory_type_index) override;
void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 size) override;
void unmap(mem_handle_t mem_handle) override;
VkDeviceMemory get_vk_device_memory(mem_handle_t mem_handle) override;
u64 get_vk_device_memory_offset(mem_handle_t /*mem_handle*/) override;
f32 get_memory_usage() override;
};
struct memory_block
{
memory_block(VkDevice dev, u64 block_sz, u64 alignment, u32 memory_type_index);
~memory_block();
VkDeviceMemory get_vk_device_memory();
u64 get_vk_device_memory_offset();
void* map(u64 offset, u64 size);
void unmap();
memory_block(const memory_block&) = delete;
memory_block(memory_block&&) = delete;
private:
VkDevice m_device;
vk::mem_allocator_base* m_mem_allocator;
mem_allocator_base::mem_handle_t m_mem_handle;
};
void vmm_notify_memory_allocated(void* handle, u32 memory_type, u64 memory_size);
void vmm_notify_memory_freed(void* handle);
void vmm_reset();
void vmm_check_memory_usage();
bool vmm_handle_memory_pressure(rsx::problem_severity severity);
mem_allocator_base* get_current_mem_allocator();
}

View file

@ -1,36 +0,0 @@
#include "memory_block.h"
namespace vk
{
memory_block::memory_block(VkDevice dev, u64 block_sz, u64 alignment, u32 memory_type_index)
: m_device(dev)
{
m_mem_allocator = get_current_mem_allocator();
m_mem_handle = m_mem_allocator->alloc(block_sz, alignment, memory_type_index);
}
memory_block::~memory_block()
{
m_mem_allocator->free(m_mem_handle);
}
VkDeviceMemory memory_block::get_vk_device_memory()
{
return m_mem_allocator->get_vk_device_memory(m_mem_handle);
}
u64 memory_block::get_vk_device_memory_offset()
{
return m_mem_allocator->get_vk_device_memory_offset(m_mem_handle);
}
void* memory_block::map(u64 offset, u64 size)
{
return m_mem_allocator->map(m_mem_handle, offset, size);
}
void memory_block::unmap()
{
m_mem_allocator->unmap(m_mem_handle);
}
}

View file

@ -1,27 +0,0 @@
#pragma once
#include "../VulkanAPI.h"
#include "mem_allocator.h"
namespace vk
{
struct memory_block
{
memory_block(VkDevice dev, u64 block_sz, u64 alignment, u32 memory_type_index);
~memory_block();
VkDeviceMemory get_vk_device_memory();
u64 get_vk_device_memory_offset();
void* map(u64 offset, u64 size);
void unmap();
memory_block(const memory_block&) = delete;
memory_block(memory_block&&) = delete;
private:
VkDevice m_device;
vk::mem_allocator_base* m_mem_allocator;
mem_allocator_base::mem_handle_t m_mem_handle;
};
}

View file

@ -1,265 +0,0 @@
#include "render_device.h"
#include "mem_allocator.h"
#include "shared.h"
#include "Emu/system_config.h"
namespace vk
{
void render_device::create(vk::physical_device& pdev, u32 graphics_queue_idx)
{
float queue_priorities[1] = {0.f};
pgpu = &pdev;
VkDeviceQueueCreateInfo queue = {};
queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue.pNext = NULL;
queue.queueFamilyIndex = graphics_queue_idx;
queue.queueCount = 1;
queue.pQueuePriorities = queue_priorities;
// Set up instance information
std::vector<const char*> requested_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
// Enable hardware features manually
// Currently we require:
// 1. Anisotropic sampling
// 2. DXT support
// 3. Indexable storage buffers
VkPhysicalDeviceFeatures enabled_features{};
if (pgpu->shader_types_support.allow_float16)
{
requested_extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (pgpu->conditional_render_support)
{
requested_extensions.push_back(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
}
if (pgpu->unrestricted_depth_range_support)
{
requested_extensions.push_back(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
enabled_features.logicOp = VK_TRUE;
enabled_features.depthClamp = VK_TRUE;
enabled_features.depthBounds = VK_TRUE;
enabled_features.wideLines = VK_TRUE;
enabled_features.largePoints = VK_TRUE;
enabled_features.shaderFloat64 = VK_TRUE;
if (g_cfg.video.antialiasing_level != msaa_level::none)
{
// MSAA features
if (!pgpu->features.shaderStorageImageMultisample || !pgpu->features.shaderStorageImageWriteWithoutFormat)
{
// TODO: Slow fallback to emulate this
// Just warn and let the driver decide whether to crash or not
rsx_log.fatal("Your GPU driver does not support some required MSAA features. Expect problems.");
}
enabled_features.sampleRateShading = VK_TRUE;
enabled_features.alphaToOne = VK_TRUE;
enabled_features.shaderStorageImageMultisample = VK_TRUE;
// enabled_features.shaderStorageImageReadWithoutFormat = VK_TRUE; // Unused currently, may be needed soon
enabled_features.shaderStorageImageWriteWithoutFormat = VK_TRUE;
}
// enabled_features.shaderSampledImageArrayDynamicIndexing = TRUE; // Unused currently but will be needed soon
enabled_features.shaderClipDistance = VK_TRUE;
// enabled_features.shaderCullDistance = VK_TRUE; // Alt notation of clip distance
enabled_features.samplerAnisotropy = VK_TRUE;
enabled_features.textureCompressionBC = VK_TRUE;
enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_TRUE;
// Optionally disable unsupported stuff
if (!pgpu->features.shaderFloat64)
{
rsx_log.error("Your GPU does not support double precision floats in shaders. Graphics may not work correctly.");
enabled_features.shaderFloat64 = VK_FALSE;
}
if (!pgpu->features.depthBounds)
{
rsx_log.error("Your GPU does not support depth bounds testing. Graphics may not work correctly.");
enabled_features.depthBounds = VK_FALSE;
}
if (!pgpu->features.sampleRateShading && enabled_features.sampleRateShading)
{
rsx_log.error("Your GPU does not support sample rate shading for multisampling. Graphics may be inaccurate when MSAA is enabled.");
enabled_features.sampleRateShading = VK_FALSE;
}
if (!pgpu->features.alphaToOne && enabled_features.alphaToOne)
{
// AMD proprietary drivers do not expose alphaToOne support
rsx_log.error("Your GPU does not support alpha-to-one for multisampling. Graphics may be inaccurate when MSAA is enabled.");
enabled_features.alphaToOne = VK_FALSE;
}
VkDeviceCreateInfo device = {};
device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device.pNext = nullptr;
device.queueCreateInfoCount = 1;
device.pQueueCreateInfos = &queue;
device.enabledLayerCount = 0;
device.ppEnabledLayerNames = nullptr; // Deprecated
device.enabledExtensionCount = ::size32(requested_extensions);
device.ppEnabledExtensionNames = requested_extensions.data();
device.pEnabledFeatures = &enabled_features;
VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
if (pgpu->shader_types_support.allow_float16)
{
// Allow use of f16 type in shaders if possible
shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
shader_support_info.shaderFloat16 = VK_TRUE;
device.pNext = &shader_support_info;
rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible.");
}
else
{
rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t.");
}
CHECK_RESULT(vkCreateDevice(*pgpu, &device, nullptr, &dev));
// Import optional function endpoints
if (pgpu->conditional_render_support)
{
cmdBeginConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdBeginConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT"));
cmdEndConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdEndConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT"));
}
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
if (g_cfg.video.disable_vulkan_mem_allocator)
m_allocator = std::make_unique<vk::mem_allocator_vk>(dev, pdev);
else
m_allocator = std::make_unique<vk::mem_allocator_vma>(dev, pdev);
}
void render_device::destroy()
{
if (dev && pgpu)
{
if (m_allocator)
{
m_allocator->destroy();
m_allocator.reset();
}
vkDestroyDevice(dev, nullptr);
dev = nullptr;
memory_map = {};
m_formats_support = {};
}
}
const VkFormatProperties render_device::get_format_properties(VkFormat format)
{
auto found = pgpu->format_properties.find(format);
if (found != pgpu->format_properties.end())
{
return found->second;
}
auto& props = pgpu->format_properties[format];
vkGetPhysicalDeviceFormatProperties(*pgpu, format, &props);
return props;
}
bool render_device::get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const
{
VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties();
for (u32 i = 0; i < 32; i++)
{
if ((typeBits & 1) == 1)
{
if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask)
{
if (type_index)
{
*type_index = i;
}
return true;
}
}
typeBits >>= 1;
}
return false;
}
const physical_device& render_device::gpu() const
{
return *pgpu;
}
const memory_type_mapping& render_device::get_memory_mapping() const
{
return memory_map;
}
const gpu_formats_support& render_device::get_formats_support() const
{
return m_formats_support;
}
const pipeline_binding_table& render_device::get_pipeline_binding_table() const
{
return m_pipeline_binding_table;
}
const gpu_shader_types_support& render_device::get_shader_types_support() const
{
return pgpu->shader_types_support;
}
bool render_device::get_shader_stencil_export_support() const
{
return pgpu->stencil_export_support;
}
bool render_device::get_depth_bounds_support() const
{
return pgpu->features.depthBounds != VK_FALSE;
}
bool render_device::get_alpha_to_one_support() const
{
return pgpu->features.alphaToOne != VK_FALSE;
}
bool render_device::get_conditional_render_support() const
{
return pgpu->conditional_render_support;
}
bool render_device::get_unrestricted_depth_range_support() const
{
return pgpu->unrestricted_depth_range_support;
}
mem_allocator_base* render_device::get_allocator() const
{
return m_allocator.get();
}
render_device::operator VkDevice() const
{
return dev;
}
}

View file

@ -1,51 +0,0 @@
#pragma once
#include "physical_device.h"
#include <memory>
namespace vk
{
class mem_allocator_base;
class render_device
{
physical_device* pgpu = nullptr;
memory_type_mapping memory_map{};
gpu_formats_support m_formats_support{};
pipeline_binding_table m_pipeline_binding_table{};
std::unique_ptr<mem_allocator_base> m_allocator;
VkDevice dev = VK_NULL_HANDLE;
public:
// Exported device endpoints
PFN_vkCmdBeginConditionalRenderingEXT cmdBeginConditionalRenderingEXT = nullptr;
PFN_vkCmdEndConditionalRenderingEXT cmdEndConditionalRenderingEXT = nullptr;
public:
render_device() = default;
~render_device() = default;
void create(vk::physical_device& pdev, u32 graphics_queue_idx);
void destroy();
const VkFormatProperties get_format_properties(VkFormat format);
bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const;
const physical_device& gpu() const;
const memory_type_mapping& get_memory_mapping() const;
const gpu_formats_support& get_formats_support() const;
const pipeline_binding_table& get_pipeline_binding_table() const;
const gpu_shader_types_support& get_shader_types_support() const;
bool get_shader_stencil_export_support() const;
bool get_depth_bounds_support() const;
bool get_alpha_to_one_support() const;
bool get_conditional_render_support() const;
bool get_unrestricted_depth_range_support() const;
mem_allocator_base* get_allocator() const;
operator VkDevice() const;
};
}

View file

@ -0,0 +1,165 @@
#include "buffer_object.h"
#include "image.h"
#include "sampler.h"
#include "../VKResourceManager.h"
#include "Utilities/address_range.h"
#include <util/asm.hpp>
namespace vk
{
std::unordered_map<VkImageViewType, std::unique_ptr<viewable_image>> g_null_image_views;
std::unordered_map<u32, std::unique_ptr<image>> g_typeless_textures;
VkSampler g_null_sampler = nullptr;
std::unique_ptr<buffer> g_scratch_buffer;
VkSampler null_sampler()
{
if (g_null_sampler)
return g_null_sampler;
VkSamplerCreateInfo sampler_info = {};
sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
sampler_info.anisotropyEnable = VK_FALSE;
sampler_info.compareEnable = VK_FALSE;
sampler_info.unnormalizedCoordinates = VK_FALSE;
sampler_info.mipLodBias = 0;
sampler_info.maxAnisotropy = 0;
sampler_info.magFilter = VK_FILTER_NEAREST;
sampler_info.minFilter = VK_FILTER_NEAREST;
sampler_info.compareOp = VK_COMPARE_OP_NEVER;
sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
vkCreateSampler(*g_render_device, &sampler_info, nullptr, &g_null_sampler);
return g_null_sampler;
}
vk::image_view* null_image_view(vk::command_buffer& cmd, VkImageViewType type)
{
if (auto found = g_null_image_views.find(type);
found != g_null_image_views.end())
{
return found->second->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
}
VkImageType image_type;
u32 num_layers = 1;
u32 flags = 0;
u16 size = 4;
switch (type)
{
case VK_IMAGE_VIEW_TYPE_1D:
image_type = VK_IMAGE_TYPE_1D;
size = 1;
break;
case VK_IMAGE_VIEW_TYPE_2D:
image_type = VK_IMAGE_TYPE_2D;
break;
case VK_IMAGE_VIEW_TYPE_3D:
image_type = VK_IMAGE_TYPE_3D;
break;
case VK_IMAGE_VIEW_TYPE_CUBE:
image_type = VK_IMAGE_TYPE_2D;
flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
num_layers = 6;
break;
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
image_type = VK_IMAGE_TYPE_2D;
num_layers = 2;
break;
default:
rsx_log.fatal("Unexpected image view type 0x%x", static_cast<u32>(type));
return nullptr;
}
auto& tex = g_null_image_views[type];
tex = std::make_unique<viewable_image>(*g_render_device, g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type, VK_FORMAT_B8G8R8A8_UNORM, size, size, 1, 1, num_layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, flags);
// Initialize memory to transparent black
tex->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkClearColorValue clear_color = {};
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
vkCmdClearColorImage(cmd, tex->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range);
// Prep for shader access
tex->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
// Return view
return tex->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
}
vk::image* get_typeless_helper(VkFormat format, rsx::format_class format_class, u32 requested_width, u32 requested_height)
{
auto create_texture = [&]()
{
u32 new_width = utils::align(requested_width, 1024u);
u32 new_height = utils::align(requested_height, 1024u);
return new vk::image(*g_render_device, g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D, format, new_width, new_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0);
};
const u32 key = (format_class << 24u) | format;
auto& ptr = g_typeless_textures[key];
if (!ptr || ptr->width() < requested_width || ptr->height() < requested_height)
{
if (ptr)
{
requested_width = std::max(requested_width, ptr->width());
requested_height = std::max(requested_height, ptr->height());
get_resource_manager()->dispose(ptr);
}
ptr.reset(create_texture());
}
return ptr.get();
}
vk::buffer* get_scratch_buffer(u32 min_required_size)
{
if (g_scratch_buffer && g_scratch_buffer->size() < min_required_size)
{
// Scratch heap cannot fit requirements. Discard it and allocate a new one.
vk::get_resource_manager()->dispose(g_scratch_buffer);
}
if (!g_scratch_buffer)
{
// Choose optimal size
const u64 alloc_size = std::max<u64>(64 * 0x100000, utils::align(min_required_size, 0x100000));
g_scratch_buffer = std::make_unique<vk::buffer>(*g_render_device, alloc_size,
g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0);
}
return g_scratch_buffer.get();
}
void clear_scratch_resources()
{
g_null_image_views.clear();
g_scratch_buffer.reset();
g_typeless_textures.clear();
if (g_null_sampler)
{
vkDestroySampler(*g_render_device, g_null_sampler, nullptr);
g_null_sampler = nullptr;
}
}
}

View file

@ -0,0 +1,12 @@
#pragma once
#include "image.h"
namespace vk
{
VkSampler null_sampler();
image_view* null_image_view(command_buffer&, VkImageViewType type);
image* get_typeless_helper(VkFormat format, rsx::format_class format_class, u32 requested_width, u32 requested_height);
buffer* get_scratch_buffer(u32 min_required_size = 0);
void clear_scratch_resources();
}

View file

@ -131,4 +131,15 @@ namespace vk
// Let the app crash..
return false;
}
VkBool32 BreakCallback(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType,
u64 srcObject, usz location, s32 msgCode,
const char* pLayerPrefix, const char* pMsg, void* pUserData)
{
#ifdef _WIN32
DebugBreak();
#endif
return false;
}
}

View file

@ -15,4 +15,9 @@ namespace vk
VKAPI_ATTR VkBool32 VKAPI_CALL dbgFunc(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType,
u64 srcObject, usz location, s32 msgCode,
const char *pLayerPrefix, const char *pMsg, void *pUserData);
VkBool32 BreakCallback(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType,
u64 srcObject, usz location, s32 msgCode,
const char* pLayerPrefix, const char* pMsg,
void* pUserData);
}

View file

@ -1,52 +0,0 @@
#pragma once
#include "../VulkanAPI.h"
#include <algorithm>
#include <vector>
namespace vk
{
class supported_extensions
{
private:
std::vector<VkExtensionProperties> m_vk_exts;
public:
enum enumeration_class
{
instance = 0,
device = 1
};
supported_extensions(enumeration_class _class, const char* layer_name = nullptr, VkPhysicalDevice pdev = VK_NULL_HANDLE)
{
u32 count;
if (_class == enumeration_class::instance)
{
if (vkEnumerateInstanceExtensionProperties(layer_name, &count, nullptr) != VK_SUCCESS)
return;
}
else
{
ensure(pdev);
if (vkEnumerateDeviceExtensionProperties(pdev, layer_name, &count, nullptr) != VK_SUCCESS)
return;
}
m_vk_exts.resize(count);
if (_class == enumeration_class::instance)
{
vkEnumerateInstanceExtensionProperties(layer_name, &count, m_vk_exts.data());
}
else
{
vkEnumerateDeviceExtensionProperties(pdev, layer_name, &count, m_vk_exts.data());
}
}
bool is_supported(const char* ext)
{
return std::any_of(m_vk_exts.cbegin(), m_vk_exts.cend(), [&](const VkExtensionProperties& p) { return std::strcmp(p.extensionName, ext) == 0; });
}
};
}

View file

@ -0,0 +1,750 @@
#pragma once
#ifdef HAVE_X11
#include <X11/Xutil.h>
#endif
#include "../../display.h"
#include "../VulkanAPI.h"
#include "image.h"
#include <memory>
namespace vk
{
struct swapchain_image_WSI
{
VkImage value = VK_NULL_HANDLE;
};
class swapchain_image_RPCS3 : public image
{
std::unique_ptr<buffer> m_dma_buffer;
u32 m_width = 0;
u32 m_height = 0;
public:
swapchain_image_RPCS3(render_device& dev, const memory_type_mapping& memory_map, u32 width, u32 height)
:image(dev, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, width, height, 1, 1, 1,
VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0)
{
m_width = width;
m_height = height;
current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
m_dma_buffer = std::make_unique<buffer>(dev, m_width * m_height * 4, memory_map.host_visible_coherent,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
}
void do_dma_transfer(command_buffer& cmd)
{
VkBufferImageCopy copyRegion = {};
copyRegion.bufferOffset = 0;
copyRegion.bufferRowLength = m_width;
copyRegion.bufferImageHeight = m_height;
copyRegion.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
copyRegion.imageOffset = {};
copyRegion.imageExtent = { m_width, m_height, 1 };
change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vkCmdCopyImageToBuffer(cmd, value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_dma_buffer->value, 1, &copyRegion);
change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
u32 get_required_memory_size() const
{
return m_width * m_height * 4;
}
void* get_pixels()
{
return m_dma_buffer->map(0, VK_WHOLE_SIZE);
}
void free_pixels()
{
m_dma_buffer->unmap();
}
};
class swapchain_base
{
protected:
render_device dev;
u32 m_present_queue = UINT32_MAX;
u32 m_graphics_queue = UINT32_MAX;
VkQueue vk_graphics_queue = VK_NULL_HANDLE;
VkQueue vk_present_queue = VK_NULL_HANDLE;
display_handle_t window_handle{};
u32 m_width = 0;
u32 m_height = 0;
VkFormat m_surface_format = VK_FORMAT_B8G8R8A8_UNORM;
virtual void init_swapchain_images(render_device& dev, u32 count) = 0;
public:
swapchain_base(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM)
{
dev.create(gpu, _graphics_queue);
if (_graphics_queue < UINT32_MAX) vkGetDeviceQueue(dev, _graphics_queue, 0, &vk_graphics_queue);
if (_present_queue < UINT32_MAX) vkGetDeviceQueue(dev, _present_queue, 0, &vk_present_queue);
m_present_queue = _present_queue;
m_graphics_queue = _graphics_queue;
m_surface_format = format;
}
virtual ~swapchain_base() = default;
virtual void create(display_handle_t& handle) = 0;
virtual void destroy(bool full = true) = 0;
virtual bool init() = 0;
virtual u32 get_swap_image_count() const = 0;
virtual VkImage get_image(u32 index) = 0;
virtual VkResult acquire_next_swapchain_image(VkSemaphore semaphore, u64 timeout, u32* result) = 0;
virtual void end_frame(command_buffer& cmd, u32 index) = 0;
virtual VkResult present(VkSemaphore semaphore, u32 index) = 0;
virtual VkImageLayout get_optimal_present_layout() = 0;
virtual bool supports_automatic_wm_reports() const
{
return false;
}
virtual bool init(u32 w, u32 h)
{
m_width = w;
m_height = h;
return init();
}
const vk::render_device& get_device()
{
return dev;
}
const VkQueue& get_present_queue()
{
return vk_present_queue;
}
const VkQueue& get_graphics_queue()
{
return vk_graphics_queue;
}
const VkFormat get_surface_format()
{
return m_surface_format;
}
const bool is_headless() const
{
return (vk_present_queue == VK_NULL_HANDLE);
}
};
template<typename T>
class abstract_swapchain_impl : public swapchain_base
{
protected:
std::vector<T> swapchain_images;
public:
abstract_swapchain_impl(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM)
: swapchain_base(gpu, _present_queue, _graphics_queue, format)
{}
~abstract_swapchain_impl() override = default;
u32 get_swap_image_count() const override
{
return ::size32(swapchain_images);
}
using swapchain_base::init;
};
using native_swapchain_base = abstract_swapchain_impl<std::pair<bool, std::unique_ptr<swapchain_image_RPCS3>>>;
using WSI_swapchain_base = abstract_swapchain_impl<swapchain_image_WSI>;
#ifdef _WIN32
class swapchain_WIN32 : public native_swapchain_base
{
HDC hDstDC = NULL;
HDC hSrcDC = NULL;
HBITMAP hDIB = NULL;
LPVOID hPtr = NULL;
public:
swapchain_WIN32(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM)
: native_swapchain_base(gpu, _present_queue, _graphics_queue, format)
{}
~swapchain_WIN32() {}
bool init() override
{
if (hDIB || hSrcDC)
destroy(false);
RECT rect;
GetClientRect(window_handle, &rect);
m_width = rect.right - rect.left;
m_height = rect.bottom - rect.top;
if (m_width == 0 || m_height == 0)
{
rsx_log.error("Invalid window dimensions %d x %d", m_width, m_height);
return false;
}
BITMAPINFO bitmap = {};
bitmap.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
bitmap.bmiHeader.biWidth = m_width;
bitmap.bmiHeader.biHeight = m_height * -1;
bitmap.bmiHeader.biPlanes = 1;
bitmap.bmiHeader.biBitCount = 32;
bitmap.bmiHeader.biCompression = BI_RGB;
hSrcDC = CreateCompatibleDC(hDstDC);
hDIB = CreateDIBSection(hSrcDC, &bitmap, DIB_RGB_COLORS, &hPtr, NULL, 0);
SelectObject(hSrcDC, hDIB);
init_swapchain_images(dev, 3);
return true;
}
void create(display_handle_t& handle) override
{
window_handle = handle;
hDstDC = GetDC(handle);
}
void destroy(bool full = true) override
{
DeleteObject(hDIB);
DeleteDC(hSrcDC);
hDIB = NULL;
hSrcDC = NULL;
swapchain_images.clear();
if (full)
{
ReleaseDC(window_handle, hDstDC);
hDstDC = NULL;
dev.destroy();
}
}
VkResult present(VkSemaphore /*semaphore*/, u32 image) override
{
auto& src = swapchain_images[image];
GdiFlush();
if (hSrcDC)
{
memcpy(hPtr, src.second->get_pixels(), src.second->get_required_memory_size());
BitBlt(hDstDC, 0, 0, m_width, m_height, hSrcDC, 0, 0, SRCCOPY);
src.second->free_pixels();
}
src.first = false;
return VK_SUCCESS;
}
#elif defined(__APPLE__)
class swapchain_MacOS : public native_swapchain_base
{
void* nsView = nullptr;
public:
swapchain_MacOS(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM)
: native_swapchain_base(gpu, _present_queue, _graphics_queue, format)
{}
~swapchain_MacOS() {}
bool init() override
{
//TODO: get from `nsView`
m_width = 0;
m_height = 0;
if (m_width == 0 || m_height == 0)
{
rsx_log.error("Invalid window dimensions %d x %d", m_width, m_height);
return false;
}
init_swapchain_images(dev, 3);
return true;
}
void create(display_handle_t& window_handle) override
{
nsView = window_handle;
}
void destroy(bool full = true) override
{
swapchain_images.clear();
if (full)
dev.destroy();
}
VkResult present(VkSemaphore /*semaphore*/, u32 index) override
{
fmt::throw_exception("Native macOS swapchain is not implemented yet!");
}
#elif defined(HAVE_X11)
class swapchain_X11 : public native_swapchain_base
{
Display* display = nullptr;
Window window = 0;
XImage* pixmap = nullptr;
GC gc = nullptr;
int bit_depth = 24;
public:
swapchain_X11(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM)
: native_swapchain_base(gpu, _present_queue, _graphics_queue, format)
{}
~swapchain_X11() override = default;
bool init() override
{
if (pixmap)
destroy(false);
Window root;
int x, y;
u32 w = 0, h = 0, border, depth;
if (XGetGeometry(display, window, &root, &x, &y, &w, &h, &border, &depth))
{
m_width = w;
m_height = h;
bit_depth = depth;
}
if (m_width == 0 || m_height == 0)
{
rsx_log.error("Invalid window dimensions %d x %d", m_width, m_height);
return false;
}
XVisualInfo visual{};
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
if (!XMatchVisualInfo(display, DefaultScreen(display), bit_depth, TrueColor, &visual))
#pragma GCC diagnostic pop
{
rsx_log.error("Could not find matching visual info!");
return false;
}
pixmap = XCreateImage(display, visual.visual, visual.depth, ZPixmap, 0, nullptr, m_width, m_height, 32, 0);
init_swapchain_images(dev, 3);
return true;
}
void create(display_handle_t& window_handle) override
{
std::visit([&](auto&& p)
{
using T = std::decay_t<decltype(p)>;
if constexpr (std::is_same_v<T, std::pair<Display*, Window>>)
{
display = p.first;
window = p.second;
}
}, window_handle);
if (display == NULL)
{
rsx_log.fatal("Could not create virtual display on this window protocol (Wayland?)");
return;
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
gc = DefaultGC(display, DefaultScreen(display));
#pragma GCC diagnostic pop
}
void destroy(bool full = true) override
{
pixmap->data = nullptr;
XDestroyImage(pixmap);
pixmap = NULL;
swapchain_images.clear();
if (full)
dev.destroy();
}
VkResult present(VkSemaphore /*semaphore*/, u32 index) override
{
auto& src = swapchain_images[index];
if (pixmap)
{
pixmap->data = static_cast<char*>(src.second->get_pixels());
XPutImage(display, window, gc, pixmap, 0, 0, 0, 0, m_width, m_height);
XFlush(display);
src.second->free_pixels();
}
//Release reference
src.first = false;
return VK_SUCCESS;
}
#else
class swapchain_Wayland : public native_swapchain_base
{
public:
swapchain_Wayland(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM)
: native_swapchain_base(gpu, _present_queue, _graphics_queue, format)
{}
~swapchain_Wayland() {}
bool init() override
{
fmt::throw_exception("Native Wayland swapchain is not implemented yet!");
}
void create(display_handle_t& window_handle) override
{
fmt::throw_exception("Native Wayland swapchain is not implemented yet!");
}
void destroy(bool full = true) override
{
fmt::throw_exception("Native Wayland swapchain is not implemented yet!");
}
VkResult present(VkSemaphore /*semaphore*/, u32 index) override
{
fmt::throw_exception("Native Wayland swapchain is not implemented yet!");
}
#endif
VkResult acquire_next_swapchain_image(VkSemaphore /*semaphore*/, u64 /*timeout*/, u32* result) override
{
u32 index = 0;
for (auto& p : swapchain_images)
{
if (!p.first)
{
p.first = true;
*result = index;
return VK_SUCCESS;
}
++index;
}
return VK_NOT_READY;
}
void end_frame(command_buffer& cmd, u32 index) override
{
swapchain_images[index].second->do_dma_transfer(cmd);
}
VkImage get_image(u32 index) override
{
return swapchain_images[index].second->value;
}
VkImageLayout get_optimal_present_layout() override
{
return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
protected:
void init_swapchain_images(render_device& dev, u32 preferred_count) override
{
swapchain_images.resize(preferred_count);
for (auto& img : swapchain_images)
{
img.second = std::make_unique<swapchain_image_RPCS3>(dev, dev.get_memory_mapping(), m_width, m_height);
img.first = false;
}
}
};
class swapchain_WSI : public WSI_swapchain_base
{
VkSurfaceKHR m_surface = VK_NULL_HANDLE;
VkColorSpaceKHR m_color_space = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
VkSwapchainKHR m_vk_swapchain = nullptr;
PFN_vkCreateSwapchainKHR createSwapchainKHR = nullptr;
PFN_vkDestroySwapchainKHR destroySwapchainKHR = nullptr;
PFN_vkGetSwapchainImagesKHR getSwapchainImagesKHR = nullptr;
PFN_vkAcquireNextImageKHR acquireNextImageKHR = nullptr;
PFN_vkQueuePresentKHR queuePresentKHR = nullptr;
bool m_wm_reports_flag = false;
protected:
void init_swapchain_images(render_device& dev, u32 /*preferred_count*/ = 0) override
{
u32 nb_swap_images = 0;
getSwapchainImagesKHR(dev, m_vk_swapchain, &nb_swap_images, nullptr);
if (!nb_swap_images) fmt::throw_exception("Driver returned 0 images for swapchain");
std::vector<VkImage> vk_images;
vk_images.resize(nb_swap_images);
getSwapchainImagesKHR(dev, m_vk_swapchain, &nb_swap_images, vk_images.data());
swapchain_images.resize(nb_swap_images);
for (u32 i = 0; i < nb_swap_images; ++i)
{
swapchain_images[i].value = vk_images[i];
}
}
public:
swapchain_WSI(vk::physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format, VkSurfaceKHR surface, VkColorSpaceKHR color_space, bool force_wm_reporting_off)
: WSI_swapchain_base(gpu, _present_queue, _graphics_queue, format)
{
createSwapchainKHR = reinterpret_cast<PFN_vkCreateSwapchainKHR>(vkGetDeviceProcAddr(dev, "vkCreateSwapchainKHR"));
destroySwapchainKHR = reinterpret_cast<PFN_vkDestroySwapchainKHR>(vkGetDeviceProcAddr(dev, "vkDestroySwapchainKHR"));
getSwapchainImagesKHR = reinterpret_cast<PFN_vkGetSwapchainImagesKHR>(vkGetDeviceProcAddr(dev, "vkGetSwapchainImagesKHR"));
acquireNextImageKHR = reinterpret_cast<PFN_vkAcquireNextImageKHR>(vkGetDeviceProcAddr(dev, "vkAcquireNextImageKHR"));
queuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(vkGetDeviceProcAddr(dev, "vkQueuePresentKHR"));
m_surface = surface;
m_color_space = color_space;
if (!force_wm_reporting_off)
{
switch (gpu.get_driver_vendor())
{
case driver_vendor::AMD:
break;
case driver_vendor::INTEL:
#ifdef _WIN32
break;
#endif
case driver_vendor::NVIDIA:
case driver_vendor::RADV:
m_wm_reports_flag = true;
break;
default:
break;
}
}
}
~swapchain_WSI() override = default;
void create(display_handle_t&) override
{}
void destroy(bool = true) override
{
if (VkDevice pdev = dev)
{
if (m_vk_swapchain)
{
destroySwapchainKHR(pdev, m_vk_swapchain, nullptr);
}
dev.destroy();
}
}
using WSI_swapchain_base::init;
bool init() override
{
if (vk_present_queue == VK_NULL_HANDLE)
{
rsx_log.error("Cannot create WSI swapchain without a present queue");
return false;
}
VkSwapchainKHR old_swapchain = m_vk_swapchain;
vk::physical_device& gpu = const_cast<vk::physical_device&>(dev.gpu());
VkSurfaceCapabilitiesKHR surface_descriptors = {};
CHECK_RESULT(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(gpu, m_surface, &surface_descriptors));
if (surface_descriptors.maxImageExtent.width < m_width ||
surface_descriptors.maxImageExtent.height < m_height)
{
rsx_log.error("Swapchain: Swapchain creation failed because dimensions cannot fit. Max = %d, %d, Requested = %d, %d",
surface_descriptors.maxImageExtent.width, surface_descriptors.maxImageExtent.height, m_width, m_height);
return false;
}
VkExtent2D swapchainExtent;
if (surface_descriptors.currentExtent.width == UINT32_MAX)
{
swapchainExtent.width = m_width;
swapchainExtent.height = m_height;
}
else
{
if (surface_descriptors.currentExtent.width == 0 || surface_descriptors.currentExtent.height == 0)
{
rsx_log.warning("Swapchain: Current surface extent is a null region. Is the window minimized?");
return false;
}
swapchainExtent = surface_descriptors.currentExtent;
m_width = surface_descriptors.currentExtent.width;
m_height = surface_descriptors.currentExtent.height;
}
u32 nb_available_modes = 0;
CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &nb_available_modes, nullptr));
std::vector<VkPresentModeKHR> present_modes(nb_available_modes);
CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &nb_available_modes, present_modes.data()));
VkPresentModeKHR swapchain_present_mode = VK_PRESENT_MODE_FIFO_KHR;
std::vector<VkPresentModeKHR> preferred_modes;
if (!g_cfg.video.vk.force_fifo)
{
// List of preferred modes in decreasing desirability
// NOTE: Always picks "triple-buffered vsync" types if possible
if (!g_cfg.video.vsync)
{
preferred_modes = { VK_PRESENT_MODE_IMMEDIATE_KHR, VK_PRESENT_MODE_MAILBOX_KHR, VK_PRESENT_MODE_FIFO_RELAXED_KHR };
}
}
bool mode_found = false;
for (VkPresentModeKHR preferred_mode : preferred_modes)
{
//Search for this mode in supported modes
for (VkPresentModeKHR mode : present_modes)
{
if (mode == preferred_mode)
{
swapchain_present_mode = mode;
mode_found = true;
break;
}
}
if (mode_found)
break;
}
rsx_log.notice("Swapchain: present mode %d in use.", static_cast<int>(swapchain_present_mode));
u32 nb_swap_images = surface_descriptors.minImageCount + 1;
if (surface_descriptors.maxImageCount > 0)
{
//Try to negotiate for a triple buffer setup
//In cases where the front-buffer isnt available for present, its better to have a spare surface
nb_swap_images = std::max(surface_descriptors.minImageCount + 2u, 3u);
if (nb_swap_images > surface_descriptors.maxImageCount)
{
// Application must settle for fewer images than desired:
nb_swap_images = surface_descriptors.maxImageCount;
}
}
VkSurfaceTransformFlagBitsKHR pre_transform = surface_descriptors.currentTransform;
if (surface_descriptors.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)
pre_transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
VkSwapchainCreateInfoKHR swap_info = {};
swap_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
swap_info.surface = m_surface;
swap_info.minImageCount = nb_swap_images;
swap_info.imageFormat = m_surface_format;
swap_info.imageColorSpace = m_color_space;
swap_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
swap_info.preTransform = pre_transform;
swap_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
swap_info.imageArrayLayers = 1;
swap_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
swap_info.presentMode = swapchain_present_mode;
swap_info.oldSwapchain = old_swapchain;
swap_info.clipped = true;
swap_info.imageExtent.width = std::max(m_width, surface_descriptors.minImageExtent.width);
swap_info.imageExtent.height = std::max(m_height, surface_descriptors.minImageExtent.height);
createSwapchainKHR(dev, &swap_info, nullptr, &m_vk_swapchain);
if (old_swapchain)
{
if (!swapchain_images.empty())
{
swapchain_images.clear();
}
destroySwapchainKHR(dev, old_swapchain, nullptr);
}
init_swapchain_images(dev);
return true;
}
bool supports_automatic_wm_reports() const override
{
return m_wm_reports_flag;
}
VkResult acquire_next_swapchain_image(VkSemaphore semaphore, u64 timeout, u32* result) override
{
return vkAcquireNextImageKHR(dev, m_vk_swapchain, timeout, semaphore, VK_NULL_HANDLE, result);
}
void end_frame(command_buffer& /*cmd*/, u32 /*index*/) override
{
}
VkResult present(VkSemaphore semaphore, u32 image) override
{
VkPresentInfoKHR present = {};
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
present.pNext = nullptr;
present.swapchainCount = 1;
present.pSwapchains = &m_vk_swapchain;
present.pImageIndices = &image;
present.waitSemaphoreCount = 1;
present.pWaitSemaphores = &semaphore;
return queuePresentKHR(vk_present_queue, &present);
}
VkImage get_image(u32 index) override
{
return swapchain_images[index].value;
}
VkImageLayout get_optimal_present_layout() override
{
return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
}
};
}

View file

@ -0,0 +1,196 @@
#include "barriers.h"
#include "buffer_object.h"
#include "commands.h"
#include "device.h"
#include "sync.h"
#include "shared.h"
extern u64 get_system_time();
namespace vk
{
#ifdef _MSC_VER
extern "C" void _mm_pause();
#endif
fence::fence(VkDevice dev)
{
owner = dev;
VkFenceCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(dev, &info, nullptr, &handle));
}
fence::~fence()
{
if (handle)
{
vkDestroyFence(owner, handle, nullptr);
handle = VK_NULL_HANDLE;
}
}
void fence::reset()
{
vkResetFences(owner, 1, &handle);
flushed.release(false);
}
void fence::signal_flushed()
{
flushed.release(true);
}
void fence::wait_flush()
{
while (!flushed)
{
#ifdef _MSC_VER
_mm_pause();
#else
__builtin_ia32_pause();
#endif
}
}
fence::operator bool() const
{
return (handle != VK_NULL_HANDLE);
}
event::event(const render_device& dev)
{
m_device = dev;
if (dev.gpu().get_driver_vendor() != driver_vendor::AMD)
{
VkEventCreateInfo info
{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,
.flags = 0
};
vkCreateEvent(dev, &info, nullptr, &m_vk_event);
}
else
{
// Work around AMD's broken event signals
m_buffer = std::make_unique<buffer>
(
dev,
4,
dev.get_memory_mapping().host_visible_coherent,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
0
);
m_value = reinterpret_cast<u32*>(m_buffer->map(0, 4));
*m_value = 0xCAFEBABE;
}
}
event::~event()
{
if (m_vk_event) [[likely]]
{
vkDestroyEvent(m_device, m_vk_event, nullptr);
}
else
{
m_buffer->unmap();
m_buffer.reset();
m_value = nullptr;
}
}
void event::signal(const command_buffer& cmd, VkPipelineStageFlags stages)
{
if (m_vk_event) [[likely]]
{
vkCmdSetEvent(cmd, m_vk_event, stages);
}
else
{
insert_execution_barrier(cmd, stages, VK_PIPELINE_STAGE_TRANSFER_BIT);
vkCmdFillBuffer(cmd, m_buffer->value, 0, 4, 0xDEADBEEF);
}
}
VkResult event::status() const
{
if (m_vk_event) [[likely]]
{
return vkGetEventStatus(m_device, m_vk_event);
}
else
{
return (*m_value == 0xDEADBEEF) ? VK_EVENT_SET : VK_EVENT_RESET;
}
}
VkResult wait_for_fence(fence* pFence, u64 timeout)
{
pFence->wait_flush();
if (timeout)
{
return vkWaitForFences(*g_render_device, 1, &pFence->handle, VK_FALSE, timeout * 1000ull);
}
else
{
while (auto status = vkGetFenceStatus(*g_render_device, pFence->handle))
{
switch (status)
{
case VK_NOT_READY:
continue;
default:
die_with_error(status);
return status;
}
}
return VK_SUCCESS;
}
}
VkResult wait_for_event(event* pEvent, u64 timeout)
{
u64 t = 0;
while (true)
{
switch (const auto status = pEvent->status())
{
case VK_EVENT_SET:
return VK_SUCCESS;
case VK_EVENT_RESET:
break;
default:
die_with_error(status);
return status;
}
if (timeout)
{
if (!t)
{
t = get_system_time();
continue;
}
if ((get_system_time() - t) > timeout)
{
rsx_log.error("[vulkan] vk::wait_for_event has timed out!");
return VK_TIMEOUT;
}
}
//std::this_thread::yield();
#ifdef _MSC_VER
_mm_pause();
#else
__builtin_ia32_pause();
#endif
}
}
}

View file

@ -0,0 +1,46 @@
#pragma once
#include "../VulkanAPI.h"
#include "buffer_object.h"
#include "device.h"
#include "util/atomic.hpp"
namespace vk
{
class command_buffer;
struct fence
{
atomic_t<bool> flushed = false;
VkFence handle = VK_NULL_HANDLE;
VkDevice owner = VK_NULL_HANDLE;
fence(VkDevice dev);
~fence();
void reset();
void signal_flushed();
void wait_flush();
operator bool() const;
};
class event
{
VkDevice m_device = VK_NULL_HANDLE;
VkEvent m_vk_event = VK_NULL_HANDLE;
std::unique_ptr<buffer> m_buffer;
volatile u32* m_value = nullptr;
public:
event(const render_device& dev);
~event();
void signal(const command_buffer& cmd, VkPipelineStageFlags stages);
VkResult status() const;
};
VkResult wait_for_fence(fence* pFence, u64 timeout = 0ull);
VkResult wait_for_event(event* pEvent, u64 timeout = 0ull);
}

View file

@ -40,20 +40,26 @@
<ClInclude Include="Emu\RSX\VK\VKShaderInterpreter.h" />
<ClInclude Include="Emu\RSX\VK\VKTextOut.h" />
<ClInclude Include="Emu\RSX\VK\VKTextureCache.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\buffer_view.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\buffer_object.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\chip_class.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\command_pool.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\fence.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\graphics_pipeline_state.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\memory_block.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\mem_allocator.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\physical_device.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\commands.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\data_heap.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\descriptors.hpp" />
<ClInclude Include="Emu\RSX\VK\vkutils\barriers.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\framebuffer_object.hpp" />
<ClInclude Include="Emu\RSX\VK\vkutils\image.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\image_helpers.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\scratch.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\swapchain.hpp" />
<ClInclude Include="Emu\RSX\VK\vkutils\sync.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\graphics_pipeline_state.hpp" />
<ClInclude Include="Emu\RSX\VK\vkutils\memory.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\device.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\pipeline_binding_table.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\query_pool.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\render_device.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\query_pool.hpp" />
<ClInclude Include="Emu\RSX\VK\vkutils\sampler.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\shared.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\supported_extensions.h" />
<ClInclude Include="Emu\RSX\VK\vkutils\instance.hpp" />
<ClInclude Include="Emu\RSX\VK\VKVertexProgram.h" />
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />
</ItemGroup>
@ -76,14 +82,17 @@
<ClCompile Include="Emu\RSX\VK\VKResourceManager.cpp" />
<ClCompile Include="Emu\RSX\VK\VKShaderInterpreter.cpp" />
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\buffer_view.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\barriers.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\buffer_object.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\chip_class.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\command_pool.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\fence.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\memory_block.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\mem_allocator.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\physical_device.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\render_device.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\commands.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\data_heap.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\image.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\image_helpers.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\scratch.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\sync.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\memory.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\device.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\sampler.cpp" />
<ClCompile Include="Emu\RSX\VK\vkutils\shared.cpp" />
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />

View file

@ -26,31 +26,40 @@
<ClCompile Include="Emu\RSX\VK\vkutils\chip_class.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\fence.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\mem_allocator.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\memory_block.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\physical_device.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\sampler.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\shared.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\buffer_view.cpp">
<ClCompile Include="Emu\RSX\VK\vkutils\buffer_object.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\render_device.cpp">
<ClCompile Include="Emu\RSX\VK\vkutils\commands.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\command_pool.cpp">
<ClCompile Include="Emu\RSX\VK\vkutils\sync.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\memory.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\device.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\image.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\data_heap.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\barriers.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\scratch.cpp">
<Filter>vkutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\vkutils\image_helpers.cpp">
<Filter>vkutils</Filter>
</ClCompile>
</ItemGroup>
@ -81,43 +90,61 @@
<ClInclude Include="Emu\RSX\VK\vkutils\chip_class.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\fence.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\graphics_pipeline_state.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\mem_allocator.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\memory_block.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\physical_device.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\pipeline_binding_table.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\query_pool.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\sampler.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\shared.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\supported_extensions.h">
<ClInclude Include="Emu\RSX\VK\vkutils\buffer_object.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\buffer_view.h">
<ClInclude Include="Emu\RSX\VK\vkutils\commands.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\render_device.h">
<ClInclude Include="Emu\RSX\VK\vkutils\sync.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\command_pool.h">
<ClInclude Include="Emu\RSX\VK\vkutils\graphics_pipeline_state.hpp">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\device.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\instance.hpp">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\memory.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\query_pool.hpp">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\framebuffer_object.hpp">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\image.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\swapchain.hpp">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\descriptors.hpp">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\data_heap.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\barriers.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\scratch.h">
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\vkutils\image_helpers.h">
<Filter>vkutils</Filter>
</ClInclude>
</ItemGroup>

View file

@ -5,13 +5,14 @@
#include "Utilities/Thread.h"
#if defined(_WIN32) || defined(HAVE_VULKAN)
#include "Emu/RSX/VK/VKHelpers.h"
#include "Emu/RSX/VK/vkutils/instance.hpp"
#endif
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
#include <util/logs.hpp>
LOG_CHANNEL(cfg_log, "CFG");
@ -39,11 +40,11 @@ render_creator::render_creator(QObject *parent) : QObject(parent)
{
thread_ctrl::set_native_priority(-1);
vk::context device_enum_context;
if (device_enum_context.createInstance("RPCS3", true))
vk::instance device_enum_context;
if (device_enum_context.create("RPCS3", true))
{
device_enum_context.makeCurrentInstance();
std::vector<vk::physical_device>& gpus = device_enum_context.enumerateDevices();
device_enum_context.bind();
std::vector<vk::physical_device>& gpus = device_enum_context.enumerate_devices();
if (!gpus.empty())
{