From d287ba44ce489c745c0fd5b90f2c3cbd78827594 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 21 Feb 2016 18:49:27 +0300 Subject: [PATCH 01/13] Add vulkan GS backend to project --- rpcs3/Emu/RSX/GSRender.h | 3 ++- rpcs3/config.h | 7 ++++++- rpcs3/rpcs3.cpp | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/GSRender.h b/rpcs3/Emu/RSX/GSRender.h index d82048d8b9..2ebffee14a 100644 --- a/rpcs3/Emu/RSX/GSRender.h +++ b/rpcs3/Emu/RSX/GSRender.h @@ -36,7 +36,8 @@ enum class frame_type { Null, OpenGL, - DX12 + DX12, + Vulkan }; class GSRender : public rsx::thread diff --git a/rpcs3/config.h b/rpcs3/config.h index d1ffdfd0db..bb88f6547d 100644 --- a/rpcs3/config.h +++ b/rpcs3/config.h @@ -50,7 +50,8 @@ enum class rsx_renderer_type { Null, OpenGL, - DX12 + DX12, + Vulkan }; enum class rsx_aspect_ratio @@ -93,6 +94,7 @@ namespace convert case rsx_renderer_type::Null: return "Null"; case rsx_renderer_type::OpenGL: return "OpenGL"; case rsx_renderer_type::DX12: return "DX12"; + case rsx_renderer_type::Vulkan: return "Vulkan"; } return "Unknown"; @@ -113,6 +115,9 @@ namespace convert if (value == "DX12") return rsx_renderer_type::DX12; + if (value == "Vulkan") + return rsx_renderer_type::Vulkan; + return rsx_renderer_type::Null; } }; diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index ef79af2391..899bea4107 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -32,6 +32,7 @@ #include "Emu/RSX/Null/NullGSRender.h" #include "Emu/RSX/GL/GLGSRender.h" +#include "Emu/RSX/VK/VKGSRender.h" #include "Emu/Audio/Null/NullAudioThread.h" #include "Emu/Audio/AL/OpenALThread.h" #ifdef _MSC_VER @@ -129,6 +130,7 @@ bool Rpcs3App::OnInit() case frame_type::OpenGL: return std::make_unique(); case frame_type::DX12: return std::make_unique("DirectX 12"); case frame_type::Null: return std::make_unique("Null"); + case frame_type::Vulkan: return std::make_unique("Vulkan"); } throw EXCEPTION("Invalid Frame Type"); @@ -142,6 +144,7 @@ bool Rpcs3App::OnInit() case rsx_renderer_type::OpenGL: return std::make_shared(); #ifdef _MSC_VER case rsx_renderer_type::DX12: return std::make_shared(); + case rsx_renderer_type::Vulkan: return std::make_shared<>(VKGSRender); #endif default: throw EXCEPTION("Invalid GS Renderer %d", (int)mode); } From 3b6e3fb3b43a8a1b4eb182c3fbc8c2392f4ab24d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 21 Feb 2016 18:50:49 +0300 Subject: [PATCH 02/13] Rework vertex upload code and fix indexed renders Rebase on current master; Refactor vertex upload code Fix build; Minor fixes Start preparations for merge Fix generic indexed drawing bugs Define WIN32_KHR only for windows Remove linking against vulkan-1.lib --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 197 +++- rpcs3/Emu/RSX/Common/TextureUtils.h | 9 +- rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp | 291 +++++ rpcs3/Emu/RSX/VK/VKCommonDecompiler.h | 20 + rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 321 ++++++ rpcs3/Emu/RSX/VK/VKFragmentProgram.h | 69 ++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 1058 +++++++++++++++++- rpcs3/Emu/RSX/VK/VKGSRender.h | 98 +- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 285 +++++ rpcs3/Emu/RSX/VK/VKHelpers.h | 1314 +++++++++++++++++++++++ rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 47 + rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 805 ++++++++++++++ rpcs3/Emu/RSX/VK/VKRenderTargets.h | 118 ++ rpcs3/Emu/RSX/VK/VKTexture.cpp | 560 ++++++++++ rpcs3/Emu/RSX/VK/VKTextureCache.h | 240 +++++ rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 566 ++++++++++ rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 301 ++++++ rpcs3/Emu/RSX/VK/VKVertexProgram.h | 58 + rpcs3/Emu/RSX/VK/VulkanAPI.cpp | 1 + rpcs3/Emu/RSX/VK/VulkanAPI.h | 16 + rpcs3/Gui/SettingsDialog.cpp | 2 + rpcs3/VKGSRender.vcxproj | 264 ++--- rpcs3/VKGSRender.vcxproj.filters | 84 +- rpcs3/rpcs3.cpp | 2 +- rpcs3/rpcs3.vcxproj | 12 +- 25 files changed, 6545 insertions(+), 193 deletions(-) create mode 100644 rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKCommonDecompiler.h create mode 100644 rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKFragmentProgram.h create mode 100644 rpcs3/Emu/RSX/VK/VKHelpers.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKHelpers.h create mode 100644 rpcs3/Emu/RSX/VK/VKProgramBuffer.h create mode 100644 rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKRenderTargets.h create mode 100644 rpcs3/Emu/RSX/VK/VKTexture.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKTextureCache.h create mode 100644 rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKVertexProgram.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKVertexProgram.h create mode 100644 rpcs3/Emu/RSX/VK/VulkanAPI.cpp create mode 100644 rpcs3/Emu/RSX/VK/VulkanAPI.h diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index f5f66b8dae..d504d2ee03 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -57,26 +57,26 @@ struct copy_unmodified_block_swizzled }; /** - * Texture upload template. - * - * Source textures are stored as following (for power of 2 textures): - * - For linear texture every mipmap level share rowpitch (which is the one of mipmap 0). This means that for non 0 mipmap there's padding between row. - * - For swizzled texture row pitch is texture width X pixel/block size. There's not padding between row. - * - There is no padding between 2 mipmap levels. This means that next mipmap level starts at offset rowpitch X row count - * - Cubemap images are 128 bytes aligned. - * - * The template iterates over all depth (including cubemap) and over all mipmaps. - * The alignment is 256 for mipmap levels and 512 for depth (TODO: make this customisable for Vulkan ?) - * The template takes a struct with a "copy_mipmap_level" static function that copy the given mipmap level and returns the offset to add to the src buffer for next - * mipmap level (to allow same code for packed/non packed texels) - * Sometimes texture provides a pitch even if texture is swizzled (and then packed) and in such case it's ignored. It's passed via suggested_pitch and is used only if padded_row is false. - */ +* Texture upload template. +* +* Source textures are stored as following (for power of 2 textures): +* - For linear texture every mipmap level share rowpitch (which is the one of mipmap 0). This means that for non 0 mipmap there's padding between row. +* - For swizzled texture row pitch is texture width X pixel/block size. There's not padding between row. +* - There is no padding between 2 mipmap levels. This means that next mipmap level starts at offset rowpitch X row count +* - Cubemap images are 128 bytes aligned. +* +* The template iterates over all depth (including cubemap) and over all mipmaps. +* The alignment is 256 for mipmap levels and 512 for depth (DX12), varies for vulkan +* The template takes a struct with a "copy_mipmap_level" static function that copy the given mipmap level and returns the offset to add to the src buffer for next +* mipmap level (to allow same code for packed/non packed texels) +* Sometimes texture provides a pitch even if texture is swizzled (and then packed) and in such case it's ignored. It's passed via suggested_pitch and is used only if padded_row is false. +*/ template -std::vector copy_texture_data(gsl::span dst, const SRC_TYPE *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u32 suggested_pitch_in_bytes) +std::vector copy_texture_data(gsl::span dst, const SRC_TYPE *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u32 suggested_pitch_in_bytes, size_t alignment) { /** - * Note about size type: RSX texture width is stored in a 16 bits int and pitch is stored in a 20 bits int. - */ + * Note about size type: RSX texture width is stored in a 16 bits int and pitch is stored in a 20 bits int. + */ // <= 128 so fits in u8 u8 block_size_in_bytes = sizeof(DST_TYPE); @@ -92,7 +92,7 @@ std::vector copy_texture_data(gsl::span dst, const SR for (unsigned mip_level = 0; mip_level < mipmap_count; mip_level++) { // since mip_level is up to 16 bits needs at least 17 bits. - u32 dst_pitch = align(miplevel_width_in_block * block_size_in_bytes, 256) / block_size_in_bytes; + u32 dst_pitch = align(miplevel_width_in_block * block_size_in_bytes, alignment) / block_size_in_bytes; MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; @@ -118,6 +118,44 @@ std::vector copy_texture_data(gsl::span dst, const SR return Result; } +/** + * Copy a single mipmap level starting at a given offset with a given rowpitch alignment + */ + +template +void copy_single_mipmap_layer(gsl::span dst, const SRC_TYPE *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u16 mipmap_index, u16 layer_index, u32 suggested_pitch_in_bytes, u32 dst_pitch) +{ + u8 block_size_in_bytes = sizeof(DST_TYPE); + size_t offsetInSrc = 0; + + u16 texture_height_in_block = (height_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; + u16 texture_width_in_block = (width_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; + + for (unsigned layer = 0; layer <= layer_index; layer++) + { + u16 miplevel_height_in_block = texture_height_in_block, miplevel_width_in_block = texture_width_in_block; + for (unsigned mip_level = 0; mip_level < mipmap_count; mip_level++) + { + u32 src_pitch_in_block = padded_row ? suggested_pitch_in_bytes / block_size_in_bytes : miplevel_width_in_block; + u32 dst_pitch_in_block = dst_pitch / block_size_in_bytes; + const SRC_TYPE *src_with_offset = reinterpret_cast(reinterpret_cast(src) + offsetInSrc); + + if (mip_level == mipmap_index && + layer == layer_index) + { + T::copy_mipmap_level(dst.subspan(0, dst_pitch_in_block * depth * miplevel_height_in_block), src_with_offset, miplevel_height_in_block, miplevel_width_in_block, depth, dst_pitch_in_block, src_pitch_in_block); + break; + } + + offsetInSrc += miplevel_height_in_block * src_pitch_in_block * block_size_in_bytes * depth; + miplevel_height_in_block = MAX2(miplevel_height_in_block / 2, 1); + miplevel_width_in_block = MAX2(miplevel_width_in_block / 2, 1); + } + + offsetInSrc = align(offsetInSrc, 128); + } +} + /** * A texture is stored as an array of blocks, where a block is a pixel for standard texture * but is a structure containing several pixels for compressed format @@ -202,7 +240,7 @@ size_t get_texture_block_edge(u32 format) } -size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement) +size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement, size_t mipmapAlignment) { size_t w = texture.width(), h = texture.height(), d = MAX2(texture.depth(), 1); @@ -218,7 +256,7 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi for (unsigned mipmap = 0; mipmap < texture.mipmap(); ++mipmap) { size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement); - result += align(rowPitch * heightInBlocks * d, 512); + result += align(rowPitch * heightInBlocks * d, mipmapAlignment); heightInBlocks = MAX2(heightInBlocks / 2, 1); widthInBlocks = MAX2(widthInBlocks / 2, 1); } @@ -226,7 +264,7 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi return result * (texture.cubemap() ? 6 : 1); } -std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignement) +std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignment) { u16 w = texture.width(), h = texture.height(); u16 depth; @@ -262,45 +300,132 @@ std::vector upload_placed_texture(gsl::span mapped_b { case CELL_GCM_TEXTURE_A8R8G8B8: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), 4 * w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), 4 * w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_COMPRESSED_DXT1: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_COMPRESSED_DXT23: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_COMPRESSED_DXT45: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_B8: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); - case CELL_GCM_TEXTURE_DEPTH24_D8: // Opaque type ; ATM do not copy anything - return std::vector(); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); } throw EXCEPTION("Wrong format %d", format); } +/** + * Upload texture mipmaps where alignment and offset information is provided manually + */ +void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info) +{ + u16 w = texture.width(), h = texture.height(); + u16 depth; + u8 layer; + + if (texture.dimension() == 1) + { + depth = 1; + layer = 1; + h = 1; + } + else if (texture.dimension() == 2) + { + depth = 1; + layer = texture.cubemap() ? 6 : 1; + } + else if (texture.dimension() == 3) + { + depth = texture.depth(); + layer = 1; + } + else + throw EXCEPTION("Unsupported texture dimension %d", texture.dimension()); + + int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + const u32 texaddr = rsx::get_address(texture.offset(), texture.location()); + auto pixels = vm::ps3::_ptr(texaddr); + bool is_swizzled = !(texture.format() & CELL_GCM_TEXTURE_LN); + + //TODO: Layers greater than 0 + for (u32 mip_level = 0; mip_level < texture.mipmap(); ++mip_level) + { + gsl::span mapped_buffer = dst_buffer.subspan(alignment_offset_info[mip_level].first); + + switch (format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_B8: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + default: + throw EXCEPTION("Wrong format %d", format); + } + } +} + size_t get_texture_size(const rsx::texture &texture) { size_t w = texture.width(), h = texture.height(); diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 34bff63c9c..b327faa902 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -15,7 +15,7 @@ struct MipmapLevelInfo * Get size to store texture in a linear fashion. * Storage is assumed to use a rowPitchAlignement boundary for every row of texture. */ -size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement); +size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement, size_t mipmapAlignment=512); /** * Write texture data to textureData. @@ -24,6 +24,13 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi */ std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignement); +/** +* Upload texture mipmaps where alignment and offset information is provided manually. +* alignment_offset info is an array of N mipmaps providing the offset into the data block and row-pitch alignment of each +* mipmap level individually. +*/ +void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info); + /** * Get number of bytes occupied by texture in RSX mem */ diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp new file mode 100644 index 0000000000..2d00f5d697 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -0,0 +1,291 @@ +#include "stdafx.h" +#include "VKCommonDecompiler.h" +#include "../VulKan/glslang/SPIRV/GlslangToSpv.h" + +namespace vk +{ + std::string getFloatTypeNameImpl(size_t elementCount) + { + switch (elementCount) + { + default: + abort(); + case 1: + return "float"; + case 2: + return "vec2"; + case 3: + return "vec3"; + case 4: + return "vec4"; + } + } + + std::string getFunctionImpl(FUNCTION f) + { + switch (f) + { + default: + abort(); + case FUNCTION::FUNCTION_DP2: + return "vec4(dot($0.xy, $1.xy))"; + case FUNCTION::FUNCTION_DP2A: + return ""; + case FUNCTION::FUNCTION_DP3: + return "vec4(dot($0.xyz, $1.xyz))"; + case FUNCTION::FUNCTION_DP4: + return "vec4(dot($0, $1))"; + case FUNCTION::FUNCTION_DPH: + return "vec4(dot(vec4($0.xyz, 1.0), $1))"; + case FUNCTION::FUNCTION_SFL: + return "vec4(0., 0., 0., 0.)"; + case FUNCTION::FUNCTION_STR: + return "vec4(1., 1., 1., 1.)"; + case FUNCTION::FUNCTION_FRACT: + return "fract($0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D: + return "texture($t, $0.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_PROJ: + return "textureProj($t, $0.x, $1.x)"; // Note: $1.x is bias + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_LOD: + return "textureLod($t, $0.x, $1)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D: + return "texture($t, $0.xy)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_PROJ: + return "textureProj($t, $0.xyz, $1.x)"; // Note: $1.x is bias + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_LOD: + return "textureLod($t, $0.xy, $1)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE: + return "texture($t, $0.xyz)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_PROJ: + return "textureProj($t, $0.xyzw, $1.x)"; // Note: $1.x is bias + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_LOD: + return "textureLod($t, $0.xyz, $1)"; + case FUNCTION::FUNCTION_DFDX: + return "dFdx($0)"; + case FUNCTION::FUNCTION_DFDY: + return "dFdy($0)"; + } + } + + std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1) + { + switch (f) + { + case COMPARE::FUNCTION_SEQ: + return "equal(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGE: + return "greaterThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGT: + return "greaterThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLE: + return "lessThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLT: + return "lessThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SNE: + return "notEqual(" + Op0 + ", " + Op1 + ")"; + } + throw EXCEPTION("Unknow compare function"); + } + + void insert_glsl_legacy_function(std::ostream& OS) + { + OS << "vec4 divsq_legacy(vec4 num, vec4 denum)\n"; + OS << "{\n"; + OS << " return num / sqrt(max(denum.xxxx, 1.E-10));\n"; + OS << "}\n"; + + OS << "vec4 rcp_legacy(vec4 denum)\n"; + OS << "{\n"; + OS << " return 1. / denum;\n"; + OS << "}\n"; + + OS << "vec4 rsq_legacy(vec4 val)\n"; + OS << "{\n"; + OS << " return float(1.0 / sqrt(max(val.x, 1.E-10))).xxxx;\n"; + OS << "}\n\n"; + + OS << "vec4 log2_legacy(vec4 val)\n"; + OS << "{\n"; + OS << " return log2(max(val.x, 1.E-10)).xxxx;\n"; + OS << "}\n\n"; + + OS << "vec4 lit_legacy(vec4 val)"; + OS << "{\n"; + OS << " vec4 clamped_val = val;\n"; + OS << " clamped_val.x = max(val.x, 0.);\n"; + OS << " clamped_val.y = max(val.y, 0.);\n"; + OS << " vec4 result;\n"; + OS << " result.x = 1.;\n"; + OS << " result.w = 1.;\n"; + OS << " result.y = clamped_val.x;\n"; + OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.;\n"; + OS << " return result;\n"; + OS << "}\n\n"; + } + + void init_default_resources(TBuiltInResource &rsc) + { + rsc.maxLights = 32; + rsc.maxClipPlanes = 6; + rsc.maxTextureUnits = 32; + rsc.maxTextureCoords = 32; + rsc.maxVertexAttribs = 64; + rsc.maxVertexUniformComponents = 4096; + rsc.maxVaryingFloats = 64; + rsc.maxVertexTextureImageUnits = 32; + rsc.maxCombinedTextureImageUnits = 80; + rsc.maxTextureImageUnits = 32; + rsc.maxFragmentUniformComponents = 4096; + rsc.maxDrawBuffers = 32; + rsc.maxVertexUniformVectors = 128; + rsc.maxVaryingVectors = 8; + rsc.maxFragmentUniformVectors = 16; + rsc.maxVertexOutputVectors = 16; + rsc.maxFragmentInputVectors = 15; + rsc.maxProgramTexelOffset = -8; + rsc.maxProgramTexelOffset = 7; + rsc.maxClipDistances = 8; + rsc.maxComputeWorkGroupCountX = 65535; + rsc.maxComputeWorkGroupCountY = 65535; + rsc.maxComputeWorkGroupCountZ = 65535; + rsc.maxComputeWorkGroupSizeX = 1024; + rsc.maxComputeWorkGroupSizeY = 1024; + rsc.maxComputeWorkGroupSizeZ = 64; + rsc.maxComputeUniformComponents = 1024; + rsc.maxComputeTextureImageUnits = 16; + rsc.maxComputeImageUniforms = 8; + rsc.maxComputeAtomicCounters = 8; + rsc.maxComputeAtomicCounterBuffers = 1; + rsc.maxVaryingComponents = 60; + rsc.maxVertexOutputComponents = 64; + rsc.maxGeometryInputComponents = 64; + rsc.maxGeometryOutputComponents = 128; + rsc.maxFragmentInputComponents = 128; + rsc.maxImageUnits = 8; + rsc.maxCombinedImageUnitsAndFragmentOutputs = 8; + rsc.maxCombinedShaderOutputResources = 8; + rsc.maxImageSamples = 0; + rsc.maxVertexImageUniforms = 0; + rsc.maxTessControlImageUniforms = 0; + rsc.maxTessEvaluationImageUniforms = 0; + rsc.maxGeometryImageUniforms = 0; + rsc.maxFragmentImageUniforms = 8; + rsc.maxCombinedImageUniforms = 8; + rsc.maxGeometryTextureImageUnits = 16; + rsc.maxGeometryOutputVertices = 256; + rsc.maxGeometryTotalOutputComponents = 1024; + rsc.maxGeometryUniformComponents = 1024; + rsc.maxGeometryVaryingComponents = 64; + rsc.maxTessControlInputComponents = 128; + rsc.maxTessControlOutputComponents = 128; + rsc.maxTessControlTextureImageUnits = 16; + rsc.maxTessControlUniformComponents = 1024; + rsc.maxTessControlTotalOutputComponents = 4096; + rsc.maxTessEvaluationInputComponents = 128; + rsc.maxTessEvaluationOutputComponents = 128; + rsc.maxTessEvaluationTextureImageUnits = 16; + rsc.maxTessEvaluationUniformComponents = 1024; + rsc.maxTessPatchComponents = 120; + rsc.maxPatchVertices = 32; + rsc.maxTessGenLevel = 64; + rsc.maxViewports = 16; + rsc.maxVertexAtomicCounters = 0; + rsc.maxTessControlAtomicCounters = 0; + rsc.maxTessEvaluationAtomicCounters = 0; + rsc.maxGeometryAtomicCounters = 0; + rsc.maxFragmentAtomicCounters = 8; + rsc.maxCombinedAtomicCounters = 8; + rsc.maxAtomicCounterBindings = 1; + rsc.maxVertexAtomicCounterBuffers = 0; + rsc.maxTessControlAtomicCounterBuffers = 0; + rsc.maxTessEvaluationAtomicCounterBuffers = 0; + rsc.maxGeometryAtomicCounterBuffers = 0; + rsc.maxFragmentAtomicCounterBuffers = 1; + rsc.maxCombinedAtomicCounterBuffers = 1; + rsc.maxAtomicCounterBufferSize = 16384; + rsc.maxTransformFeedbackBuffers = 4; + rsc.maxTransformFeedbackInterleavedComponents = 64; + rsc.maxCullDistances = 8; + rsc.maxCombinedClipAndCullDistances = 8; + rsc.maxSamples = 4; + + rsc.limits.nonInductiveForLoops = 1; + rsc.limits.whileLoops = 1; + rsc.limits.doWhileLoops = 1; + rsc.limits.generalUniformIndexing = 1; + rsc.limits.generalAttributeMatrixVectorIndexing = 1; + rsc.limits.generalVaryingIndexing = 1; + rsc.limits.generalSamplerIndexing = 1; + rsc.limits.generalVariableIndexing = 1; + rsc.limits.generalConstantMatrixVectorIndexing = 1; + } + + static const varying_register_t varying_regs[] = + { + { "diff_color", 0 }, + { "tc0", 1 }, + { "tc1", 2 }, + { "tc2", 3 }, + { "tc3", 4 }, + { "tc4", 5 }, + { "tc5", 6 }, + { "tc6", 7 }, + { "tc7", 8 }, + { "tc8", 9 }, + { "tc9", 10 }, + { "front_diff_color", 11 }, + { "front_spec_color", 12 }, + { "spec_color", 13 }, + { "fog_c", 14 }, + { "fogc", 14 } + }; + + const varying_register_t & get_varying_register(const std::string & name) + { + for (const auto&t : varying_regs) + { + if (t.name == name) + return t; + } + + throw EXCEPTION("Unknown register name: %s", name); + } + + bool compile_glsl_to_spv(std::string& shader, glsl::program_domain domain, std::vector& spv) + { + EShLanguage lang = (domain == glsl::glsl_fragment_program) ? EShLangFragment : EShLangVertex; + + glslang::InitializeProcess(); + glslang::TProgram program; + glslang::TShader shader_object(lang); + + bool success = false; + const char *shader_text = shader.data(); + + TBuiltInResource rsc; + init_default_resources(rsc); + + shader_object.setStrings(&shader_text, 1); + + EShMessages msg = (EShMessages)(EShMsgVulkanRules | EShMsgSpvRules); + if (shader_object.parse(&rsc, 400, EProfile::ECoreProfile, false, true, msg)) + { + program.addShader(&shader_object); + success = program.link(EShMsgVulkanRules); + if (success) + { + glslang::TIntermediate* bytes = program.getIntermediate(lang); + glslang::GlslangToSpv(*bytes, spv); + } + } + else + { + LOG_ERROR(RSX, shader_object.getInfoLog()); + LOG_ERROR(RSX, shader_object.getInfoDebugLog()); + } + + glslang::FinalizeProcess(); + return success; + } +} diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h new file mode 100644 index 0000000000..b5b72f70b6 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h @@ -0,0 +1,20 @@ +#pragma once +#include "../Common/ShaderParam.h" +#include "VKHelpers.h" + +namespace vk +{ + struct varying_register_t + { + std::string name; + int reg_location; + }; + + std::string getFloatTypeNameImpl(size_t elementCount); + std::string getFunctionImpl(FUNCTION f); + std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1); + void insert_glsl_legacy_function(std::ostream& OS); + + const varying_register_t& get_varying_register(const std::string& name); + bool compile_glsl_to_spv(std::string& shader, glsl::program_domain domain, std::vector &spv); +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp new file mode 100644 index 0000000000..750df896cd --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -0,0 +1,321 @@ +#include "stdafx.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "VKFragmentProgram.h" + +#include "VKCommonDecompiler.h" +#include "VKHelpers.h" +#include "../GCM.h" + +std::string VKFragmentDecompilerThread::getFloatTypeName(size_t elementCount) +{ + return vk::getFloatTypeNameImpl(elementCount); +} + +std::string VKFragmentDecompilerThread::getFunction(FUNCTION f) +{ + return vk::getFunctionImpl(f); +} + +std::string VKFragmentDecompilerThread::saturate(const std::string & code) +{ + return "clamp(" + code + ", 0., 1.)"; +} + +std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) +{ + return vk::compareFunctionImpl(f, Op0, Op1); +} + +void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) +{ + OS << "#version 420" << std::endl; + OS << "#extension GL_ARB_separate_shader_objects: enable" << std::endl << std::endl; + + OS << "layout(std140, set=1, binding = 0) uniform ScaleOffsetBuffer" << std::endl; + OS << "{" << std::endl; + OS << " mat4 scaleOffsetMat;" << std::endl; + OS << " float fog_param0;" << std::endl; + OS << " float fog_param1;" << std::endl; + OS << "};" << std::endl << std::endl; + + vk::glsl::program_input in; + in.location = 0; + in.domain = vk::glsl::glsl_fragment_program; + in.name = "ScaleOffsetBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +void VKFragmentDecompilerThread::insertIntputs(std::stringstream & OS) +{ + for (const ParamType& PT : m_parr.params[PF_PARAM_IN]) + { + for (const ParamItem& PI : PT.items) + { + const vk::varying_register_t ® = vk::get_varying_register(PI.name); + + std::string var_name = PI.name; + if (var_name == "fogc") + var_name = "fog_c"; + + OS << "layout(location=" << reg.reg_location << ") in " << PT.type << " " << var_name << ";" << std::endl; + } + } +} + +void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) +{ + const std::pair table[] = + { + { "ocol0", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, + { "ocol1", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, + { "ocol2", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, + { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, + }; + + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) + OS << "layout(location=" << i << ") " << "out vec4 " << table[i].first << ";" << std::endl; + } +} + +void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) +{ + int location = 2; + + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (PT.type != "sampler1D" && + PT.type != "sampler2D" && + PT.type != "sampler3D" && + PT.type != "samplerCube") + continue; + + for (const ParamItem& PI : PT.items) + { + std::string samplerType = PT.type; + int index = atoi(&PI.name.data()[3]); + + if (m_prog.unnormalized_coords & (1 << index)) + samplerType = "sampler2DRect"; + + vk::glsl::program_input in; + in.location = location; + in.domain = vk::glsl::glsl_fragment_program; + in.name = PI.name; + in.type = vk::glsl::input_type_texture; + + inputs.push_back(in); + + OS << "layout(set=1, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";" << std::endl; + } + } + + OS << "layout(std140, set=1, binding = 1) uniform FragmentConstantsBuffer" << std::endl; + OS << "{" << std::endl; + + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (PT.type == "sampler1D" || + PT.type == "sampler2D" || + PT.type == "sampler3D" || + PT.type == "samplerCube") + continue; + + for (const ParamItem& PI : PT.items) + OS << " " << PT.type << " " << PI.name << ";" << std::endl; + } + + // A dummy value otherwise it's invalid to create an empty uniform buffer + OS << " vec4 void_value;" << std::endl; + OS << "};" << std::endl; + + vk::glsl::program_input in; + in.location = 1; + in.domain = vk::glsl::glsl_fragment_program; + in.name = "FragmentConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +namespace vk +{ + // Note: It's not clear whether fog is computed per pixel or per vertex. + // But it makes more sense to compute exp of interpoled value than to interpolate exp values. + void insert_fog_declaration(std::stringstream & OS, rsx::fog_mode mode) + { + switch (mode) + { + case rsx::fog_mode::linear: + OS << " vec4 fogc = vec4(fog_param1 * fog_c.x + (fog_param0 - 1.), fog_param1 * fog_c.x + (fog_param0 - 1.), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential: + OS << " vec4 fogc = vec4(11.084 * (fog_param1 * fog_c.x + fog_param0 - 1.5), exp(11.084 * (fog_param1 * fog_c.x + fog_param0 - 1.5)), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential2: + OS << " vec4 fogc = vec4(4.709 * (fog_param1 * fog_c.x + fog_param0 - 1.5), exp(-pow(4.709 * (fog_param1 * fog_c.x + fog_param0 - 1.5)), 2.), 0., 0.);\n"; + return; + case rsx::fog_mode::linear_abs: + OS << " vec4 fogc = vec4(fog_param1 * abs(fog_c.x) + (fog_param0 - 1.), fog_param1 * abs(fog_c.x) + (fog_param0 - 1.), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential_abs: + OS << " vec4 fogc = vec4(11.084 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5), exp(11.084 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5)), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential2_abs: + OS << " vec4 fogc = vec4(4.709 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5), exp(-pow(4.709 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5)), 2.), 0., 0.);\n"; + return; + } + } +} + +void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) +{ + vk::insert_glsl_legacy_function(OS); + + OS << "void main ()" << std::endl; + OS << "{" << std::endl; + + for (const ParamType& PT : m_parr.params[PF_PARAM_NONE]) + { + for (const ParamItem& PI : PT.items) + { + OS << " " << PT.type << " " << PI.name; + if (!PI.value.empty()) + OS << " = " << PI.value; + OS << ";" << std::endl; + } + } + + OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; + + // search if there is fogc in inputs + for (const ParamType& PT : m_parr.params[PF_PARAM_IN]) + { + for (const ParamItem& PI : PT.items) + { + if (PI.name == "fogc") + { + vk::insert_fog_declaration(OS, m_prog.fog_equation); + return; + } + } + } +} + +void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) +{ + const std::pair table[] = + { + { "ocol0", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, + { "ocol1", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, + { "ocol2", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, + { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, + }; + + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) + OS << " " << table[i].first << " = " << table[i].second << ";" << std::endl; + } + + if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + { + { + /** Note: Naruto Shippuden : Ultimate Ninja Storm 2 sets CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS in a shader + * but it writes depth in r1.z and not h2.z. + * Maybe there's a different flag for depth ? + */ + //OS << ((m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? "\tgl_FragDepth = r1.z;\n" : "\tgl_FragDepth = h0.z;\n") << std::endl; + OS << " gl_FragDepth = r1.z;\n"; + } + } + + + OS << "}" << std::endl; +} + +void VKFragmentDecompilerThread::Task() +{ + m_shader = Decompile(); + vk_prog->SetInputs(inputs); +} + +VKFragmentProgram::VKFragmentProgram() +{ +} + +VKFragmentProgram::~VKFragmentProgram() +{ + Delete(); +} + +void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) +{ + u32 size; + VKFragmentDecompilerThread decompiler(shader, parr, prog, size, *this); + decompiler.Task(); + + for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM]) + { + for (const ParamItem& PI : PT.items) + { + if (PT.type == "sampler2D") + continue; + size_t offset = atoi(PI.name.c_str() + 2); + FragmentConstantOffsetCache.push_back(offset); + } + } +} + +void VKFragmentProgram::Compile() +{ + fs::file(fs::get_config_dir() + "FragmentProgram.frag", fom::rewrite).write(shader); + + std::vector spir_v; + if (!vk::compile_glsl_to_spv(shader, vk::glsl::glsl_fragment_program, spir_v)) + throw EXCEPTION("Failed to compile fragment shader"); + + //Create the object and compile + VkShaderModuleCreateInfo fs_info; + fs_info.codeSize = spir_v.size() * sizeof(u32); + fs_info.pNext = nullptr; + fs_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + fs_info.pCode = (uint32_t*)spir_v.data(); + fs_info.flags = 0; + + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkCreateShaderModule(dev, &fs_info, nullptr, &handle); + + id = (u32)(handle); +} + +void VKFragmentProgram::Delete() +{ + shader.clear(); + + if (handle) + { + if (Emu.IsStopped()) + { + LOG_WARNING(RSX, "VKFragmentProgram::Delete(): vkDestroyShaderModule(0x%X) avoided", handle); + } + else + { + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkDestroyShaderModule(dev, handle, NULL); + handle = nullptr; + } + } +} + +void VKFragmentProgram::SetInputs(std::vector& inputs) +{ + for (auto &it : inputs) + { + uniforms.push_back(it); + } +} diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h new file mode 100644 index 0000000000..c7fa7b922d --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -0,0 +1,69 @@ +#pragma once +#include "../Common/FragmentProgramDecompiler.h" +#include "Emu/RSX/RSXFragmentProgram.h" +#include "Utilities/Thread.h" +#include "VulkanAPI.h" +#include "../VK/VKHelpers.h" + +struct VKFragmentDecompilerThread : public FragmentProgramDecompiler +{ + std::string& m_shader; + ParamArray& m_parrDummy; + std::vector inputs; + class VKFragmentProgram *vk_prog; +public: + VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) + : FragmentProgramDecompiler(prog, size) + , m_shader(shader) + , m_parrDummy(parr) + , vk_prog(&dst) + { + } + + void Task(); + const std::vector& get_inputs() { return inputs; } +protected: + virtual std::string getFloatTypeName(size_t elementCount) override; + virtual std::string getFunction(FUNCTION) override; + virtual std::string saturate(const std::string &code) override; + virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override; + + virtual void insertHeader(std::stringstream &OS) override; + virtual void insertIntputs(std::stringstream &OS) override; + virtual void insertOutputs(std::stringstream &OS) override; + virtual void insertConstants(std::stringstream &OS) override; + virtual void insertMainStart(std::stringstream &OS) override; + virtual void insertMainEnd(std::stringstream &OS) override; +}; + +/** Storage for an Fragment Program in the process of of recompilation. + * This class calls OpenGL functions and should only be used from the RSX/Graphics thread. + */ +class VKFragmentProgram +{ +public: + VKFragmentProgram(); + ~VKFragmentProgram(); + + ParamArray parr; + VkShaderModule handle = nullptr; + int id; + std::string shader; + std::vector FragmentConstantOffsetCache; + + std::vector uniforms; + void SetInputs(std::vector& uniforms); + /** + * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. + * @param prog RSXShaderProgram specifying the location and size of the shader in memory + * @param td texture dimensions of input textures + */ + void Decompile(const RSXFragmentProgram& prog); + + /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ + void Compile(); + +private: + /** Deletes the shader and any stored information */ + void Delete(); +}; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 869b4116d3..ae9fbacb6d 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,10 +1,1062 @@ #include "stdafx.h" -#ifndef __APPLE__ +#include "Utilities/rPlatform.h" // only for rImage +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/state.h" #include "VKGSRender.h" +#include "../rsx_methods.h" +#include "../Common/BufferUtils.h" -VKGSRender::VKGSRender() +namespace { + u32 get_max_depth_value(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return 0xFFFF; + case rsx::surface_depth_format::z24s8: return 0xFFFFFF; + } + throw EXCEPTION("Unknow depth format"); + } + u8 get_pixel_size(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return 2; + case rsx::surface_depth_format::z24s8: return 4; + } + throw EXCEPTION("Unknow depth format"); + } } -#endif \ No newline at end of file +namespace vk +{ + VkCompareOp compare_op(u32 gl_name) + { + switch (gl_name) + { + case CELL_GCM_GREATER: + return VK_COMPARE_OP_GREATER; + case CELL_GCM_LESS: + return VK_COMPARE_OP_LESS; + case CELL_GCM_LEQUAL: + return VK_COMPARE_OP_LESS_OR_EQUAL; + case CELL_GCM_GEQUAL: + return VK_COMPARE_OP_EQUAL; + case CELL_GCM_EQUAL: + return VK_COMPARE_OP_EQUAL; + case CELL_GCM_ALWAYS: + return VK_COMPARE_OP_ALWAYS; + default: + throw EXCEPTION("Unsupported compare op: 0x%X", gl_name); + } + } + + VkFormat get_compatible_surface_format(rsx::surface_color_format color_format) + { + switch (color_format) + { + case rsx::surface_color_format::r5g6b5: + return VK_FORMAT_R5G6B5_UNORM_PACK16; + + case rsx::surface_color_format::a8r8g8b8: + return VK_FORMAT_B8G8R8A8_UNORM; + + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + LOG_ERROR(RSX, "Format 0x%X may be buggy.", color_format); + return VK_FORMAT_B8G8R8A8_UNORM; + + case rsx::surface_color_format::w16z16y16x16: + return VK_FORMAT_R16G16B16A16_SFLOAT; + + case rsx::surface_color_format::w32z32y32x32: + return VK_FORMAT_R32G32B32A32_SFLOAT; + + case rsx::surface_color_format::b8: + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::g8b8: + case rsx::surface_color_format::x32: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::a8b8g8r8: + default: + LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format); + return VK_FORMAT_B8G8R8A8_UNORM; + } + } + + VkFormat get_compatible_depth_surface_format(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return VK_FORMAT_D16_UNORM; + case rsx::surface_depth_format::z24s8: return VK_FORMAT_D16_UNORM; + } + throw EXCEPTION("Invalid format (0x%x)", format); + } + + std::vector get_draw_buffers(rsx::surface_target fmt) + { + switch (fmt) + { + case rsx::surface_target::none: + return{}; + case rsx::surface_target::surface_a: + return{ 0 }; + case rsx::surface_target::surface_b: + return{ 1 }; + case rsx::surface_target::surfaces_a_b: + return{ 0, 1 }; + case rsx::surface_target::surfaces_a_b_c: + return{ 0, 1, 2 }; + case rsx::surface_target::surfaces_a_b_c_d: + return{ 0, 1, 2, 3 }; + default: + LOG_ERROR(RSX, "Bad surface color target: %d", fmt); + return{}; + } + } + + VkBlendFactor get_blend_factor(u16 factor) + { + switch (factor) + { + case CELL_GCM_ONE: return VK_BLEND_FACTOR_ONE; + case CELL_GCM_ZERO: return VK_BLEND_FACTOR_ZERO; + case CELL_GCM_SRC_ALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; + case CELL_GCM_DST_ALPHA: return VK_BLEND_FACTOR_DST_ALPHA; + case CELL_GCM_SRC_COLOR: return VK_BLEND_FACTOR_SRC_COLOR; + case CELL_GCM_DST_COLOR: return VK_BLEND_FACTOR_DST_COLOR; + case CELL_GCM_CONSTANT_COLOR: return VK_BLEND_FACTOR_CONSTANT_COLOR; + case CELL_GCM_CONSTANT_ALPHA: return VK_BLEND_FACTOR_CONSTANT_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case CELL_GCM_ONE_MINUS_DST_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case CELL_GCM_ONE_MINUS_SRC_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_DST_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; + case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + default: + throw EXCEPTION("Unknown blend factor 0x%X", factor); + } + }; + + VkBlendOp get_blend_op(u16 op) + { + switch (op) + { + case CELL_GCM_FUNC_ADD: return VK_BLEND_OP_ADD; + case CELL_GCM_FUNC_SUBTRACT: return VK_BLEND_OP_SUBTRACT; + case CELL_GCM_FUNC_REVERSE_SUBTRACT: return VK_BLEND_OP_REVERSE_SUBTRACT; + default: + throw EXCEPTION("Unknown blend op: 0x%X", op); + } + } +} + +VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan) +{ + shaders_cache.load(rsx::shader_language::glsl); + + HINSTANCE hInstance = NULL; + HWND hWnd = (HWND)m_frame->handle(); + + m_thread_context.createInstance("RPCS3"); + m_thread_context.makeCurrentInstance(1); + m_thread_context.enable_debugging(); + + std::vector& gpus = m_thread_context.enumerateDevices(); + m_swap_chain = m_thread_context.createSwapChain(hInstance, hWnd, gpus[0]); + + m_device = (vk::render_device *)(&m_swap_chain->get_device()); + + vk::set_current_thread_ctx(m_thread_context); + vk::set_current_renderer(m_swap_chain->get_device()); + + m_swap_chain->init_swapchain(m_frame->client_size().width, m_frame->client_size().height); + + //create command buffer... + m_command_buffer_pool.create((*m_device)); + m_command_buffer.create(m_command_buffer_pool); + + VkCommandBufferInheritanceInfo inheritance_info; + inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + inheritance_info.pNext = nullptr; + inheritance_info.renderPass = VK_NULL_HANDLE; + inheritance_info.subpass = 0; + inheritance_info.framebuffer = VK_NULL_HANDLE; + inheritance_info.occlusionQueryEnable = VK_FALSE; + inheritance_info.queryFlags = 0; + inheritance_info.pipelineStatistics = 0; + + VkCommandBufferBeginInfo begin_infos; + begin_infos.flags = 0; + begin_infos.pInheritanceInfo = &inheritance_info; + begin_infos.pNext = nullptr; + begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos)); + + for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) + { + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + VK_IMAGE_ASPECT_COLOR_BIT); + } + + CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); + execute_command_buffer(false); + + m_scale_offset_buffer.create((*m_device), 128); + m_vertex_constants_buffer.create((*m_device), 512 * 16); + m_fragment_constants_buffer.create((*m_device), 512 * 16); + m_index_buffer.create((*m_device), 65536, VK_FORMAT_R16_UINT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); +} + +VKGSRender::~VKGSRender() +{ + if (m_submit_fence) + { + vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, 1000000L); + vkDestroyFence((*m_device), m_submit_fence, nullptr); + m_submit_fence = nullptr; + } + + if (m_present_semaphore) + { + vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); + m_present_semaphore = nullptr; + } + + vk::destroy_global_resources(); + + //TODO: Properly destroy shader modules instead of calling clear... + m_prog_buffer.clear(); + + m_scale_offset_buffer.destroy(); + m_vertex_constants_buffer.destroy(); + m_fragment_constants_buffer.destroy(); + m_index_buffer.destroy(); + + if (m_render_pass) + destroy_render_pass(); + + m_command_buffer.destroy(); + m_command_buffer_pool.destroy(); + + m_swap_chain->destroy(); + + m_thread_context.close(); + delete m_swap_chain; +} + +bool VKGSRender::on_access_violation(u32 address, bool is_writing) +{ + if (is_writing) + return m_texture_cache.invalidate_address(address); + + return false; +} + +void VKGSRender::begin() +{ + rsx::thread::begin(); + + //TODO: Fence sync, ring-buffers, etc + //CHECK_RESULT(vkDeviceWaitIdle((*m_device))); + + if (!load_program()) + return; + + if (!recording) + begin_command_buffer_recording(); + + init_buffers(); + + m_program->set_draw_buffer_count(m_draw_buffers_count); + + u32 color_mask = rsx::method_registers[NV4097_SET_COLOR_MASK]; + bool color_mask_b = !!(color_mask & 0xff); + bool color_mask_g = !!((color_mask >> 8) & 0xff); + bool color_mask_r = !!((color_mask >> 16) & 0xff); + bool color_mask_a = !!((color_mask >> 24) & 0xff); + + VkColorComponentFlags mask = 0; + if (color_mask_a) mask |= VK_COLOR_COMPONENT_A_BIT; + if (color_mask_b) mask |= VK_COLOR_COMPONENT_B_BIT; + if (color_mask_g) mask |= VK_COLOR_COMPONENT_G_BIT; + if (color_mask_r) mask |= VK_COLOR_COMPONENT_R_BIT; + + VkColorComponentFlags color_masks[4] = { mask }; + + u8 render_targets[] = { 0, 1, 2, 3 }; + m_program->set_color_mask(m_draw_buffers_count, render_targets, color_masks); + + //TODO stencil mask + m_program->set_depth_write_mask(rsx::method_registers[NV4097_SET_DEPTH_MASK]); + + if (rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE]) + { + m_program->set_depth_test_enable(VK_TRUE); + m_program->set_depth_compare_op(vk::compare_op(rsx::method_registers[NV4097_SET_DEPTH_FUNC])); + } + else + m_program->set_depth_test_enable(VK_FALSE); + + if (rsx::method_registers[NV4097_SET_BLEND_ENABLE]) + { + u32 sfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR]; + u32 dfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR]; + + VkBlendFactor sfactor_rgb = vk::get_blend_factor(sfactor); + VkBlendFactor sfactor_a = vk::get_blend_factor(sfactor >> 16); + VkBlendFactor dfactor_rgb = vk::get_blend_factor(dfactor); + VkBlendFactor dfactor_a = vk::get_blend_factor(dfactor >> 16); + + //TODO: Separate target blending + + VkBool32 blend_state = VK_TRUE; + + m_program->set_blend_state(m_draw_buffers_count, render_targets, blend_state); + m_program->set_blend_func(m_draw_buffers_count, render_targets, sfactor_rgb, dfactor_rgb, sfactor_a, dfactor_a); + + u32 equation = rsx::method_registers[NV4097_SET_BLEND_EQUATION]; + VkBlendOp equation_rgb = vk::get_blend_op(equation); + VkBlendOp equation_a = vk::get_blend_op(equation >> 16); + + m_program->set_blend_op(m_draw_buffers_count, render_targets, equation_rgb, equation_a); + } + else + { + VkBool32 blend_state = VK_FALSE; + m_program->set_blend_state(m_draw_buffers_count, render_targets, blend_state); + } + + if (rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]) + { + if (rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFF && + rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFFFFFF) + { + LOG_ERROR(RSX, "Custom primitive restart index 0x%X. Should rewrite index buffer with proper value!", rsx::method_registers[NV4097_SET_RESTART_INDEX]); + } + + LOG_ERROR(RSX, "Primitive restart enabled!"); + m_program->set_primitive_restart(VK_TRUE); + } + else + m_program->set_primitive_restart(VK_FALSE); + + u32 line_width = rsx::method_registers[NV4097_SET_LINE_WIDTH]; + float actual_line_width = (line_width >> 3) + (line_width & 7) / 8.f; + + vkCmdSetLineWidth(m_command_buffer, actual_line_width); + + //TODO: Set up other render-state parameters into the program pipeline + + VkRenderPassBeginInfo rp_begin; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = m_render_pass; + rp_begin.framebuffer = m_framebuffer; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_frame->client_size().width; + rp_begin.renderArea.extent.height = m_frame->client_size().height; + rp_begin.clearValueCount = 0; + rp_begin.pClearValues = nullptr; + + vkCmdBeginRenderPass(m_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + m_draw_calls++; +} + +namespace +{ + bool normalize(rsx::vertex_base_type type) + { + switch (type) + { + case rsx::vertex_base_type::s1: + case rsx::vertex_base_type::ub: + case rsx::vertex_base_type::cmp: + return true; + case rsx::vertex_base_type::f: + case rsx::vertex_base_type::sf: + case rsx::vertex_base_type::ub256: + case rsx::vertex_base_type::s32k: + return false; + } + throw EXCEPTION("unknown vertex type"); + } +} + +void VKGSRender::end() +{ + vk::texture *texture0 = nullptr; + for (int i = 0; i < rsx::limits::textures_count; ++i) + { + if (m_program->has_uniform(vk::glsl::glsl_fragment_program, "tex" + std::to_string(i))) + { + if (!textures[i].enabled()) + { + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "tex" + std::to_string(i)); + continue; + } + + vk::texture &tex = (texture0)? (*texture0): m_texture_cache.upload_texture(m_command_buffer, textures[i], m_rtts); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "tex" + std::to_string(i), tex); + texture0 = &tex; + } + } + + auto &upload_info = upload_vertex_data(); + + m_program->set_primitive_topology(std::get<0>(upload_info)); + m_program->use(m_command_buffer, m_render_pass, 0); + + if (!std::get<1>(upload_info)) + vkCmdDraw(m_command_buffer, vertex_draw_count, 1, 0, 0); + else + { + VkIndexType &index_type = std::get<3>(upload_info); + u32 &index_count = std::get<2>(upload_info); + + vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer, 0, index_type); + vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0); + } + + vkCmdEndRenderPass(m_command_buffer); + + m_texture_cache.flush(m_command_buffer); + + end_command_buffer_recording(); + execute_command_buffer(false); + + //Finish() + vkDeviceWaitIdle((*m_device)); + + rsx::thread::end(); +} + +void VKGSRender::set_viewport() +{ + u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL]; + u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL]; + + u16 viewport_x = viewport_horizontal & 0xffff; + u16 viewport_y = viewport_vertical & 0xffff; + u16 viewport_w = viewport_horizontal >> 16; + u16 viewport_h = viewport_vertical >> 16; + + u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL]; + u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]; + u16 scissor_x = scissor_horizontal; + u16 scissor_w = scissor_horizontal >> 16; + u16 scissor_y = scissor_vertical; + u16 scissor_h = scissor_vertical >> 16; + +// u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW]; +// rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf); + + VkViewport viewport; + viewport.x = viewport_x; + viewport.y = viewport_y; + viewport.width = viewport_w; + viewport.height = viewport_h; + viewport.minDepth = 0.f; + viewport.maxDepth = 1.f; + + vkCmdSetViewport(m_command_buffer, 0, 1, &viewport); + + VkRect2D scissor; + scissor.extent.height = scissor_h; + scissor.extent.width = scissor_w; + scissor.offset.x = scissor_x; + scissor.offset.y = scissor_y; + + vkCmdSetScissor(m_command_buffer, 0, 1, &scissor); +} + +void VKGSRender::on_init_thread() +{ + GSRender::on_init_thread(); + + for (auto &attrib_buffer : m_attrib_buffers) + { + attrib_buffer.create((*m_device), 65536, VK_FORMAT_R8_UNORM, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT); + + u8 *data = static_cast(attrib_buffer.map(0, 65536)); + memset(data, 0, 65536); + attrib_buffer.unmap(); + } +} + +void VKGSRender::on_exit() +{ + m_texture_cache.destroy(); + + for (auto &attrib_buffer : m_attrib_buffers) + { + attrib_buffer.destroy(); + } +} + +void VKGSRender::clear_surface(u32 mask) +{ + //TODO: Build clear commands into current renderpass descriptor set + if (!(mask & 0xF3)) return; + + if (m_current_present_image== 0xFFFF) return; + + bool was_recording = recording; + + if (!was_recording) + begin_command_buffer_recording(); + + init_buffers(); + + float depth_clear = 1.f; + u32 stencil_clear = 0.f; + + VkClearValue depth_stencil_clear_values, color_clear_values; + VkImageSubresourceRange depth_range = vk::default_image_subresource_range(); + depth_range.aspectMask = 0; + + if (mask & 0x1) + { + rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7); + u32 max_depth_value = get_max_depth_value(surface_depth_format); + + u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8; + float depth_clear = (float)clear_depth / max_depth_value; + + depth_range.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + depth_stencil_clear_values.depthStencil.depth = depth_clear; + depth_stencil_clear_values.depthStencil.stencil = stencil_clear; + } + +/* if (mask & 0x2) + { + u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff; + u32 stencil_mask = rsx::method_registers[NV4097_SET_STENCIL_MASK]; + + //TODO set stencil mask + depth_range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + depth_stencil_clear_values.depthStencil.stencil = stencil_mask; + }*/ + + if (mask & 0xF0) + { + u32 clear_color = rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]; + u8 clear_a = clear_color >> 24; + u8 clear_r = clear_color >> 16; + u8 clear_g = clear_color >> 8; + u8 clear_b = clear_color; + + //TODO set color mask + /*VkBool32 clear_red = (VkBool32)!!(mask & 0x20); + VkBool32 clear_green = (VkBool32)!!(mask & 0x40); + VkBool32 clear_blue = (VkBool32)!!(mask & 0x80); + VkBool32 clear_alpha = (VkBool32)!!(mask & 0x10);*/ + + color_clear_values.color.float32[0] = (float)clear_r / 255; + color_clear_values.color.float32[1] = (float)clear_g / 255; + color_clear_values.color.float32[2] = (float)clear_b / 255; + color_clear_values.color.float32[3] = (float)clear_a / 255; + + VkImageSubresourceRange range = vk::default_image_subresource_range(); + + for (u32 i = 0; i < m_rtts.m_bound_render_targets.size(); ++i) + { + if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr) continue; + + VkImage color_image = (*std::get<1>(m_rtts.m_bound_render_targets[i])); + vkCmdClearColorImage(m_command_buffer, color_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &color_clear_values.color, 1, &range); + } + } + + if (mask & 0x3) + vkCmdClearDepthStencilImage(m_command_buffer, (*std::get<1>(m_rtts.m_bound_depth_stencil)), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depth_stencil_clear_values.depthStencil, 1, &depth_range); + + if (!was_recording) + { + end_command_buffer_recording(); + execute_command_buffer(false); + } + + recording = was_recording; +} + +bool VKGSRender::do_method(u32 cmd, u32 arg) +{ + switch (cmd) + { + case NV4097_CLEAR_SURFACE: + clear_surface(arg); + return true; + default: + return false; + } +} + +void VKGSRender::init_render_pass(VkFormat surface_format, VkFormat depth_format, u8 num_draw_buffers, u8 *draw_buffers) +{ + //TODO: Create buffers as requested by the game. Render to swapchain for now.. + /* Describe a render pass and framebuffer attachments */ + VkAttachmentDescription attachments[2]; + memset(&attachments, 0, sizeof attachments); + + attachments[0].format = surface_format; + attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; //Set to clear removes warnings about empty contents after flip; overwrites previous calls + attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; //PRESENT_SRC_KHR?? + attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + attachments[1].format = VK_FORMAT_D16_UNORM; /* Depth buffer format. Should be more elegant than this */ + attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkAttachmentReference template_color_reference; + template_color_reference.attachment = VK_ATTACHMENT_UNUSED; + template_color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkAttachmentReference depth_reference; + depth_reference.attachment = num_draw_buffers; + depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + //Fill in draw_buffers information... + VkAttachmentDescription real_attachments[4]; + VkAttachmentReference color_references[4]; + + for (int i = 0; i < num_draw_buffers; ++i) + { + real_attachments[i] = attachments[0]; + + color_references[i] = template_color_reference; + color_references[i].attachment = (draw_buffers)? draw_buffers[i]: i; + } + + real_attachments[num_draw_buffers] = attachments[1]; + + VkSubpassDescription subpass; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.flags = 0; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + subpass.colorAttachmentCount = num_draw_buffers; + subpass.pColorAttachments = num_draw_buffers? color_references: nullptr; + subpass.pResolveAttachments = nullptr; + subpass.pDepthStencilAttachment = &depth_reference; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = nullptr; + + VkRenderPassCreateInfo rp_info; + rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + rp_info.pNext = NULL; + rp_info.attachmentCount = num_draw_buffers+1; + rp_info.pAttachments = real_attachments; + rp_info.subpassCount = 1; + rp_info.pSubpasses = &subpass; + rp_info.dependencyCount = 0; + rp_info.pDependencies = NULL; + rp_info.flags = 0; + + CHECK_RESULT(vkCreateRenderPass((*m_device), &rp_info, NULL, &m_render_pass)); +} + +void VKGSRender::destroy_render_pass() +{ + vkDestroyRenderPass((*m_device), m_render_pass, nullptr); + m_render_pass = nullptr; +} + +bool VKGSRender::load_program() +{ + RSXVertexProgram vertex_program = get_current_vertex_program(); + RSXFragmentProgram fragment_program = get_current_fragment_program(); + + //Load current program from buffer + m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); + + //TODO: Update constant buffers.. + //1. Update scale-offset matrix + //2. Update vertex constants + //3. Update fragment constants + u8 *buf = (u8*)m_scale_offset_buffer.map(0, VK_WHOLE_SIZE); + + //TODO: Add case for this in RSXThread + /** + * NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z + * Its like D3D without the flip in y (depending on how you build the spir-v) + */ + { + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + float scale_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); + float offset_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); + offset_x /= clip_w / 2.f; + + float scale_y = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); + float offset_y = ((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); + offset_y /= clip_h / 2.f; + + float scale_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; + float offset_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; + + float one = 1.f; + + stream_vector(buf, (u32&)scale_x, 0, 0, (u32&)offset_x); + stream_vector((char*)buf + 16, 0, (u32&)scale_y, 0, (u32&)offset_y); + stream_vector((char*)buf + 32, 0, 0, (u32&)scale_z, (u32&)offset_z); + stream_vector((char*)buf + 48, 0, 0, 0, (u32&)one); + } + + memset((char*)buf+64, 0, 8); +// memcpy((char*)buf + 64, &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); +// memcpy((char*)buf + 68, &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); + m_scale_offset_buffer.unmap(); + + buf = (u8*)m_vertex_constants_buffer.map(0, VK_WHOLE_SIZE); + fill_vertex_program_constants_data(buf); + m_vertex_constants_buffer.unmap(); + + size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + buf = (u8*)m_fragment_constants_buffer.map(0, fragment_constants_sz); + m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_sz) }, fragment_program); + m_fragment_constants_buffer.unmap(); + + m_program->bind_uniform(vk::glsl::glsl_vertex_program, "ScaleOffsetBuffer", m_scale_offset_buffer); + m_program->bind_uniform(vk::glsl::glsl_vertex_program, "VertexConstantsBuffer", m_vertex_constants_buffer); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "ScaleOffsetBuffer", m_scale_offset_buffer); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "FragmentConstantsBuffer", m_fragment_constants_buffer); + + return true; +} + +static const u32 mr_color_offset[rsx::limits::color_buffers_count] = +{ + NV4097_SET_SURFACE_COLOR_AOFFSET, + NV4097_SET_SURFACE_COLOR_BOFFSET, + NV4097_SET_SURFACE_COLOR_COFFSET, + NV4097_SET_SURFACE_COLOR_DOFFSET +}; + +static const u32 mr_color_dma[rsx::limits::color_buffers_count] = +{ + NV4097_SET_CONTEXT_DMA_COLOR_A, + NV4097_SET_CONTEXT_DMA_COLOR_B, + NV4097_SET_CONTEXT_DMA_COLOR_C, + NV4097_SET_CONTEXT_DMA_COLOR_D +}; + +static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = +{ + NV4097_SET_SURFACE_PITCH_A, + NV4097_SET_SURFACE_PITCH_B, + NV4097_SET_SURFACE_PITCH_C, + NV4097_SET_SURFACE_PITCH_D +}; + +void VKGSRender::init_buffers(bool skip_reading) +{ + if (dirty_frame) + { + //Prepare surface for new frame + VkSemaphoreCreateInfo semaphore_info; + semaphore_info.flags = 0; + semaphore_info.pNext = nullptr; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore); + + VkFence nullFence = VK_NULL_HANDLE; + CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, nullFence, &m_current_present_image)); + + dirty_frame = false; + } + + prepare_rtts(); + + if (!skip_reading) + { + read_buffers(); + } + + set_viewport(); +} + +void VKGSRender::read_buffers() +{ +} + +void VKGSRender::write_buffers() +{ +} + +void VKGSRender::begin_command_buffer_recording() +{ + VkCommandBufferInheritanceInfo inheritance_info; + inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + inheritance_info.pNext = nullptr; + inheritance_info.renderPass = VK_NULL_HANDLE; + inheritance_info.subpass = 0; + inheritance_info.framebuffer = VK_NULL_HANDLE; + inheritance_info.occlusionQueryEnable = VK_FALSE; + inheritance_info.queryFlags = 0; + inheritance_info.pipelineStatistics = 0; + + VkCommandBufferBeginInfo begin_infos; + begin_infos.flags = 0; + begin_infos.pInheritanceInfo = &inheritance_info; + begin_infos.pNext = nullptr; + begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + if (m_submit_fence) + { + vkWaitForFences(*m_device, 1, &m_submit_fence, VK_TRUE, ~0ULL); + vkDestroyFence(*m_device, m_submit_fence, nullptr); + m_submit_fence = nullptr; + + CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0)); + } + + CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos)); + recording = true; +} + +void VKGSRender::end_command_buffer_recording() +{ + recording = false; + CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); +} + +void VKGSRender::prepare_rtts() +{ + u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; + + if (!m_rtts_dirty) + return; + + m_rtts_dirty = false; + bool reconfigure_render_pass = true; + + if (m_surface.format != surface_format) + { + m_surface.unpack(surface_format); + reconfigure_render_pass = true; + } + + u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL]; + u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL]; + + u32 clip_width = clip_horizontal >> 16; + u32 clip_height = clip_vertical >> 16; + u32 clip_x = clip_horizontal; + u32 clip_y = clip_vertical; + + m_rtts.prepare_render_target(&m_command_buffer, + surface_format, + clip_horizontal, clip_vertical, + rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]), + get_color_surface_addresses(), get_zeta_surface_address(), + (*m_device), &m_command_buffer); + + //Bind created rtts as current fbo... + VkImageView attachments[5]; + std::vector draw_buffers = vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])); + + m_framebuffer.destroy(); + std::vector fbo_images; + + for (u8 index: draw_buffers) + { + vk::texture *raw = std::get<1>(m_rtts.m_bound_render_targets[index]); + VkImageView as_image = (*raw); + fbo_images.push_back(as_image); + } + + if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) + { + vk::texture *raw = (std::get<1>(m_rtts.m_bound_depth_stencil)); + VkImageView depth_image = (*raw); + fbo_images.push_back(depth_image); + } + + if (reconfigure_render_pass) + { + //Create render pass with draw_buffers information + //Somewhat simliar to glDrawBuffers + + if (m_render_pass) + destroy_render_pass(); + + init_render_pass(vk::get_compatible_surface_format(m_surface.color_format), + vk::get_compatible_depth_surface_format(m_surface.depth_format), + draw_buffers.size(), + draw_buffers.data()); + } + + m_framebuffer.create((*m_device), m_render_pass, fbo_images.data(), fbo_images.size(), + clip_width, clip_height); + + m_draw_buffers_count = draw_buffers.size(); +} + +void VKGSRender::execute_command_buffer(bool wait) +{ + if (recording) + throw EXCEPTION("execute_command_buffer called before end_command_buffer_recording()!"); + + if (m_submit_fence) + throw EXCEPTION("Synchronization deadlock!"); + + VkFenceCreateInfo fence_info; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.flags = 0; + fence_info.pNext = nullptr; + + CHECK_RESULT(vkCreateFence(*m_device, &fence_info, nullptr, &m_submit_fence)); + + VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + VkCommandBuffer cmd = m_command_buffer; + + VkSubmitInfo infos; + infos.commandBufferCount = 1; + infos.pCommandBuffers = &cmd; + infos.pNext = nullptr; + infos.pSignalSemaphores = nullptr; + infos.pWaitDstStageMask = &pipe_stage_flags; + infos.signalSemaphoreCount = 0; + infos.waitSemaphoreCount = 0; + infos.pWaitSemaphores = nullptr; + infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + + CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &infos, m_submit_fence)); + CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue())); +} + +void VKGSRender::flip(int buffer) +{ + //LOG_NOTICE(Log::RSX, "flip(%d)", buffer); + u32 buffer_width = gcm_buffers[buffer].width; + u32 buffer_height = gcm_buffers[buffer].height; + u32 buffer_pitch = gcm_buffers[buffer].pitch; + + rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + + areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); + + coordi aspect_ratio; + if (1) //enable aspect ratio + { + sizei csize = m_frame->client_size(); + sizei new_size = csize; + + const double aq = (double)buffer_width / buffer_height; + const double rq = (double)new_size.width / new_size.height; + const double q = aq / rq; + + if (q > 1.0) + { + new_size.height = int(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; + } + else if (q < 1.0) + { + new_size.width = int(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; + } + + aspect_ratio.size = new_size; + } + else + { + aspect_ratio.size = m_frame->client_size(); + } + + //Check if anything is waiting in queue and submit it if possible.. + if (m_submit_fence) + { + CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); + + vkDestroyFence((*m_device), m_submit_fence, nullptr); + m_submit_fence = nullptr; + + CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0)); + } + + VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); + uint32_t next_image_temp = 0; + + VkPresentInfoKHR present; + present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present.pNext = nullptr; + present.swapchainCount = 1; + present.pSwapchains = &swap_chain; + present.pImageIndices = &m_current_present_image; + present.pWaitSemaphores = &m_present_semaphore; + present.waitSemaphoreCount = 1; + + if (m_render_pass) + { + begin_command_buffer_recording(); + + if (m_present_semaphore) + { + //Blit contents to screen.. + VkImage image_to_flip = nullptr; + + if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr) + image_to_flip = (*std::get<1>(m_rtts.m_bound_render_targets[0])); + else + image_to_flip = (*std::get<1>(m_rtts.m_bound_render_targets[1])); + + VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image); + vk::copy_scaled_image(m_command_buffer, image_to_flip, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + buffer_width, buffer_height, aspect_ratio.width, aspect_ratio.height, 1, VK_IMAGE_ASPECT_COLOR_BIT); + } + else + { + //No draw call was issued! + //TODO: Properly clear the background to rsx value + m_swap_chain->acquireNextImageKHR((*m_device), (*m_swap_chain), ~0ULL, VK_NULL_HANDLE, VK_NULL_HANDLE, &next_image_temp); + + VkImageSubresourceRange range = vk::default_image_subresource_range(); + VkClearColorValue clear_black = { 0 }; + vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(next_image_temp), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, &clear_black, 1, &range); + + present.pImageIndices = &next_image_temp; + present.waitSemaphoreCount = 0; + } + + end_command_buffer_recording(); + execute_command_buffer(false); + + CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); + CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue())); + + if (m_present_semaphore) + { + vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); + m_present_semaphore = nullptr; + } + } + + //Feed back damaged resources to the main texture cache for management... + m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources); + m_rtts.invalidated_resources.clear(); + + m_draw_calls = 0; + dirty_frame = true; + m_frame->flip(m_context); +} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 70861d5415..d4d930fc6c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -1,8 +1,98 @@ #pragma once -#include -#include +#include "Emu/RSX/GSRender.h" +#include "VKHelpers.h" +#include "VKTextureCache.h" +#include "VKRenderTargets.h" -class VKGSRender +#define RSX_DEBUG 1 + +#include "VKProgramBuffer.h" +#include "../GCM.h" + +class VKGSRender : public GSRender { +private: + VKFragmentProgram m_fragment_prog; + VKVertexProgram m_vertex_prog; + + vk::glsl::program *m_program; + vk::context m_thread_context; + + rsx::surface_info m_surface; + + vk::buffer m_attrib_buffers[rsx::limits::vertex_count]; + + vk::texture_cache m_texture_cache; + rsx::vk_render_targets m_rtts; + +public: + //vk::fbo draw_fbo; + +private: + VKProgramBuffer m_prog_buffer; + + vk::render_device *m_device; + vk::swap_chain* m_swap_chain; + //buffer + + vk::buffer m_scale_offset_buffer; + vk::buffer m_vertex_constants_buffer; + vk::buffer m_fragment_constants_buffer; + + vk::buffer m_index_buffer; + + //Vulkan internals + u32 m_current_present_image = 0xFFFF; + VkSemaphore m_present_semaphore = nullptr; + + u32 m_current_sync_buffer_index = 0; + VkFence m_submit_fence = nullptr; + + vk::command_pool m_command_buffer_pool; + vk::command_buffer m_command_buffer; + bool recording = false; + bool dirty_frame = true; + + //Single render pass + VkRenderPass m_render_pass = nullptr; + + u32 m_draw_calls = 0; + + u8 m_draw_buffers_count = 0; + vk::framebuffer m_framebuffer; + +public: VKGSRender(); -}; \ No newline at end of file + ~VKGSRender(); + +private: + void clear_surface(u32 mask); + void init_render_pass(VkFormat surface_format, VkFormat depth_format, u8 num_draw_buffers, u8 *draw_buffers); + void destroy_render_pass(); + void execute_command_buffer(bool wait); + void begin_command_buffer_recording(); + void end_command_buffer_recording(); + + void prepare_rtts(); + + std::tuple + upload_vertex_data(); + +public: + bool load_program(); + void init_buffers(bool skip_reading = false); + void read_buffers(); + void write_buffers(); + void set_viewport(); + +protected: + void begin() override; + void end() override; + + void on_init_thread() override; + void on_exit() override; + bool do_method(u32 id, u32 arg) override; + void flip(int buffer) override; + + bool on_access_violation(u32 address, bool is_writing) override; +}; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp new file mode 100644 index 0000000000..001265384f --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -0,0 +1,285 @@ +#include "stdafx.h" +#include "VKHelpers.h" + +namespace vk +{ + context *g_current_vulkan_ctx = nullptr; + render_device g_current_renderer; + + buffer g_null_buffer; + texture g_null_texture; + + VkSampler g_null_sampler = nullptr; + VkImageView g_null_image_view = nullptr; + + VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) + { + return realloc(pOriginal, size); + } + + VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) + { + return _aligned_malloc(size, alignment); + } + + VKAPI_ATTR void VKAPI_CALL mem_free(void *pUserData, void *pMemory) + { + _aligned_free(pMemory); + } + + VkFormat get_compatible_sampler_format(u32 format, VkComponentMapping& swizzle, u8 swizzle_mask) + { + u8 remap_a = swizzle_mask & 0x3; + u8 remap_r = (swizzle_mask >> 2) & 0x3; + u8 remap_g = (swizzle_mask >> 4) & 0x3; + u8 remap_b = (swizzle_mask >> 6) & 0x3; + + VkComponentSwizzle map_table[] = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; + + VkComponentMapping remapped; + remapped.a = map_table[remap_a]; + remapped.b = map_table[remap_b]; + remapped.g = map_table[remap_g]; + remapped.r = map_table[remap_r]; + + swizzle = default_component_map(); + + switch (format) + { + case CELL_GCM_TEXTURE_B8: + { + swizzle = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; + return VK_FORMAT_R8_UNORM; + } + case CELL_GCM_TEXTURE_A1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A4R4G4B4: return VK_FORMAT_B4G4R4A4_UNORM_PACK16; + case CELL_GCM_TEXTURE_R5G6B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A8R8G8B8: + { + swizzle = remapped; + return VK_FORMAT_B8G8R8A8_UNORM; + } + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + { + return VK_FORMAT_BC3_UNORM_BLOCK; + } + case CELL_GCM_TEXTURE_G8B8: + { + swizzle = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }; + return VK_FORMAT_R8G8_UNORM; + } + case CELL_GCM_TEXTURE_R6G5B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; //Expand, discard high bit? + case CELL_GCM_TEXTURE_DEPTH24_D8: return VK_FORMAT_R32_UINT; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return VK_FORMAT_R32_SFLOAT; + case CELL_GCM_TEXTURE_DEPTH16: return VK_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return VK_FORMAT_R16_SFLOAT; + case CELL_GCM_TEXTURE_X16: return VK_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16: return VK_FORMAT_R16G16_UNORM; + case CELL_GCM_TEXTURE_R5G5B5A1: return VK_FORMAT_R5G5B5A1_UNORM_PACK16; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT; + case CELL_GCM_TEXTURE_X32_FLOAT: return VK_FORMAT_R32_SFLOAT; + case CELL_GCM_TEXTURE_D1R5G5B5: + { + swizzle.a = VK_COMPONENT_SWIZZLE_ONE; + return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + } + case CELL_GCM_TEXTURE_D8R8G8B8: + { + swizzle = remapped; + swizzle.a = VK_COMPONENT_SWIZZLE_ONE; + return VK_FORMAT_B8G8R8A8_UNORM; + } + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; //Expand + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8B8A8_UNORM; //Expand + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + break; + } + throw EXCEPTION("Invalid or unsupported texture format (0x%x)", format); + } + + VkAllocationCallbacks default_callbacks() + { + VkAllocationCallbacks callbacks; + callbacks.pfnAllocation = vk::mem_alloc; + callbacks.pfnFree = vk::mem_free; + callbacks.pfnReallocation = vk::mem_realloc; + + return callbacks; + } + + VkBuffer null_buffer() + { + if (g_null_buffer.size()) + return g_null_buffer; + + g_null_buffer.create(g_current_renderer, 32, VK_FORMAT_R32_SFLOAT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT); + return g_null_buffer; + } + + VkSampler null_sampler() + { + if (g_null_sampler) + return g_null_sampler; + + VkSamplerCreateInfo sampler_info; + memset(&sampler_info, 0, sizeof(sampler_info)); + + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_info.anisotropyEnable = VK_FALSE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.pNext = nullptr; + sampler_info.unnormalizedCoordinates = VK_FALSE; + sampler_info.mipLodBias = 0; + sampler_info.maxAnisotropy = 0; + sampler_info.magFilter = VK_FILTER_NEAREST; + sampler_info.minFilter = VK_FILTER_NEAREST; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + vkCreateSampler(g_current_renderer, &sampler_info, nullptr, &g_null_sampler); + return g_null_sampler; + } + + VkImageView null_image_view() + { + if (g_null_image_view) + return g_null_image_view; + + g_null_texture.create(g_current_renderer, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT, 4, 4); + g_null_image_view = g_null_texture; + return g_null_image_view; + } + + VkBufferView null_buffer_view() + { + if (g_null_buffer.size()) + return g_null_buffer; + + g_null_buffer.create(g_current_renderer, 32, VK_FORMAT_R32_SFLOAT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT); + return g_null_buffer; + } + + void destroy_global_resources() + { + g_null_buffer.destroy(); + g_null_texture.destroy(); + + if (g_null_sampler) + vkDestroySampler(g_current_renderer, g_null_sampler, nullptr); + + g_null_sampler = nullptr; + g_null_image_view = nullptr; + } + + void set_current_thread_ctx(const vk::context &ctx) + { + g_current_vulkan_ctx = (vk::context *)&ctx; + } + + context *get_current_thread_ctx() + { + return g_current_vulkan_ctx; + } + + vk::render_device *get_current_renderer() + { + return &g_current_renderer; + } + + void set_current_renderer(const vk::render_device &device) + { + g_current_renderer = device; + } + + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageAspectFlags aspect_flags) + { + //Prepare an image to match the new layout.. + VkImageSubresourceRange range = default_image_subresource_range(); + range.aspectMask = aspect_flags; + + VkImageMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.newLayout = new_layout; + barrier.oldLayout = current_layout; + barrier.image = image; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = 0; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange = range; + + switch (new_layout) + { + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break; + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; break; + } + + switch (current_layout) + { + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break; + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; break; + } + + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + } + + VKAPI_ATTR VkBool32 VKAPI_CALL dbgFunc(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, void *pUserData) + { + if (msgFlags & VK_DEBUG_REPORT_ERROR_BIT_EXT) + { + LOG_ERROR(RSX, "ERROR: [%s] Code %d : %s", pLayerPrefix, msgCode, pMsg); + } + else if (msgFlags & VK_DEBUG_REPORT_WARNING_BIT_EXT) + { + LOG_WARNING(RSX, "WARNING: [%s] Code %d : %s", pLayerPrefix, msgCode, pMsg); + } + else + { + return false; + } + + //Let the app crash.. + return false; + } + + VkBool32 BreakCallback(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, void *pUserData) + { + DebugBreak(); + + return false; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h new file mode 100644 index 0000000000..6bb3cf692a --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -0,0 +1,1314 @@ +#pragma once + +#include "stdafx.h" +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "VulkanAPI.h" +#include "../GCM.h" + +//Set to 9 to enable all debug layers. Will cause significant slowdowns. Eventually to be replaced with GUI checkbox +#define VK_ENABLED_LAYER_COUNT 0 + +namespace rsx +{ + class texture; +} + +namespace vk +{ +#define CHECK_RESULT(expr) { VkResult __res = expr; if(__res != VK_SUCCESS) throw EXCEPTION("Assertion failed! Result is %Xh", __res); } + + VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); + VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); + VKAPI_ATTR void VKAPI_CALL mem_free(void *pUserData, void *pMemory); + + VKAPI_ATTR VkBool32 VKAPI_CALL dbgFunc(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, void *pUserData); + + VkBool32 BreakCallback(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, + void *pUserData); + + //VkAllocationCallbacks default_callbacks(); + + class context; + class render_device; + class swap_chain_image; + class physical_device; + class command_buffer; + + vk::context *get_current_thread_ctx(); + void set_current_thread_ctx(const vk::context &ctx); + + vk::render_device *get_current_renderer(); + void set_current_renderer(const vk::render_device &device); + + VkComponentMapping default_component_map(); + VkImageSubresource default_image_subresource(); + VkImageSubresourceRange default_image_subresource_range(); + + VkBuffer null_buffer(); + VkSampler null_sampler(); + VkImageView null_image_view(); + VkBufferView null_buffer_view(); + + void destroy_global_resources(); + + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageAspectFlags aspect_flags); + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect); + void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_width, u32 src_height, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect); + + VkFormat get_compatible_sampler_format(u32 format, VkComponentMapping& mapping, u8 swizzle_mask=0); + VkFormat get_compatible_surface_format(rsx::surface_color_format color_format); + VkFormat get_compatible_depth_surface_format(rsx::surface_depth_format depth_format); + + class physical_device + { + VkPhysicalDevice dev = nullptr; + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties memory_properties; + std::vector queue_props; + + public: + + physical_device() {} + ~physical_device() {} + + void set_device(VkPhysicalDevice pdev) + { + dev = pdev; + vkGetPhysicalDeviceProperties(pdev, &props); + vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties); + } + + std::string name() + { + return props.deviceName; + } + + uint32_t get_queue_count() + { + if (queue_props.size()) + return queue_props.size(); + + uint32_t count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr); + + return count; + } + + VkQueueFamilyProperties get_queue_properties(uint32_t queue) + { + if (!queue_props.size()) + { + uint32_t count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr); + + queue_props.resize(count); + vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, queue_props.data()); + } + + if (queue >= queue_props.size()) throw EXCEPTION("Undefined trap"); + return queue_props[queue]; + } + + VkPhysicalDeviceMemoryProperties get_memory_properties() + { + return memory_properties; + } + + operator VkPhysicalDevice() + { + return dev; + } + }; + + class render_device + { + vk::physical_device *pgpu; + VkDevice dev; + + public: + + render_device() + { + dev = nullptr; + pgpu = nullptr; + } + + render_device(vk::physical_device &pdev, uint32_t graphics_queue_idx) + { + VkResult err; + + float queue_priorities[1] = { 0.f }; + pgpu = &pdev; + + VkDeviceQueueCreateInfo queue; + queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue.pNext = NULL; + queue.queueFamilyIndex = graphics_queue_idx; + queue.queueCount = 1; + queue.pQueuePriorities = queue_priorities; + + //Set up instance information + const char *requested_extensions[] = + { + "VK_KHR_swapchain" + }; + + const char *validation_layers[] = + { + "VK_LAYER_LUNARG_threading", "VK_LAYER_LUNARG_mem_tracker", + "VK_LAYER_LUNARG_object_tracker", "VK_LAYER_LUNARG_draw_state", + "VK_LAYER_LUNARG_param_checker", "VK_LAYER_LUNARG_swapchain", + "VK_LAYER_LUNARG_device_limits", "VK_LAYER_LUNARG_image", + "VK_LAYER_GOOGLE_unique_objects", + }; + + VkDeviceCreateInfo device; + device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device.pNext = NULL; + device.queueCreateInfoCount = 1; + device.pQueueCreateInfos = &queue; + device.enabledLayerCount = VK_ENABLED_LAYER_COUNT; + device.ppEnabledLayerNames = validation_layers; + device.enabledExtensionCount = 1; + device.ppEnabledExtensionNames = requested_extensions; + device.pEnabledFeatures = nullptr; + + err = vkCreateDevice(*pgpu, &device, nullptr, &dev); + if (err != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + } + + ~render_device() + { + } + + void destroy() + { + if (dev && pgpu) + { + vkDestroyDevice(dev, nullptr); + dev = nullptr; + } + } + + bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32 *type_index) + { + VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties(); + + for (uint32_t i = 0; i < 32; i++) + { + if ((typeBits & 1) == 1) + { + if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask) + { + *type_index = i; + return true; + } + } + + typeBits >>= 1; + } + + return false; + } + + vk::physical_device& gpu() + { + return *pgpu; + } + + operator VkDevice() + { + return dev; + } + }; + + class memory_block + { + VkDeviceMemory vram = nullptr; + vk::render_device *owner = nullptr; + u32 vram_block_sz = 0; + + public: + memory_block() {} + ~memory_block() {} + + void allocate_from_pool(vk::render_device &device, u32 block_sz, u32 typeBits) + { + if (vram) + destroy(); + + u32 typeIndex = 0; + + owner = (vk::render_device*)&device; + VkDevice dev = (VkDevice)(*owner); + + if (!owner->get_compatible_memory_type(typeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &typeIndex)) + throw EXCEPTION("Could not find suitable memory type!"); + + VkMemoryAllocateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + infos.pNext = nullptr; + infos.allocationSize = block_sz; + infos.memoryTypeIndex = typeIndex; + + CHECK_RESULT(vkAllocateMemory(dev, &infos, nullptr, &vram)); + vram_block_sz = block_sz; + } + + void destroy() + { + VkDevice dev = (VkDevice)(*owner); + vkFreeMemory(dev, vram, nullptr); + + owner = nullptr; + vram = nullptr; + vram_block_sz = 0; + } + + vk::render_device& get_owner() + { + return (*owner); + } + + operator VkDeviceMemory() + { + return vram; + } + }; + + class texture + { + VkImageView m_view = nullptr; + VkSampler m_sampler = nullptr; + VkImage m_image_contents = nullptr; + VkMemoryRequirements m_memory_layout; + VkFormat m_internal_format; + VkImageUsageFlags m_flags; + VkImageAspectFlagBits m_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; + VkImageViewType m_view_type = VK_IMAGE_VIEW_TYPE_2D; + VkImageUsageFlags m_usage = VK_IMAGE_USAGE_SAMPLED_BIT; + VkImageTiling m_tiling = VK_IMAGE_TILING_LINEAR; + + vk::memory_block vram_allocation; + vk::render_device *owner = nullptr; + + u32 m_width; + u32 m_height; + u32 m_mipmaps; + + vk::texture *staging_texture = nullptr; + bool ready = false; + + VkSamplerAddressMode vk_wrap_mode(u32 gcm_wrap_mode); + float max_aniso(u32 gcm_aniso); + void sampler_setup(rsx::texture& tex, VkImageViewType type, VkComponentMapping swizzle); + + public: + texture(vk::swap_chain_image &img); + texture() {} + ~texture() {} + + void create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height); + void destroy(); + + void init(rsx::texture &tex, vk::command_buffer &cmd, bool ignore_checks); + void init(rsx::texture &tex, vk::command_buffer &cmd); + void flush(vk::command_buffer & cmd); + + //Fill with debug color 0xFF + void init_debug(); + + void change_layout(vk::command_buffer &cmd, VkImageLayout new_layout); + VkImageLayout get_layout(); + + const u32 width(); + const u32 height(); + const u16 mipmaps(); + const VkFormat get_format(); + + operator VkSampler(); + operator VkImageView(); + operator VkImage(); + }; + + class buffer + { + VkBufferView m_view = nullptr; + VkBuffer m_buffer = nullptr; + VkMemoryRequirements m_memory_layout; + VkFormat m_internal_format = VK_FORMAT_UNDEFINED; + VkBufferUsageFlagBits m_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + VkBufferCreateFlags m_flags = 0; + + vk::render_device *owner; + vk::memory_block vram; + u32 m_size = 0; + + bool viewable = false; + + public: + buffer() {} + ~buffer() {} + + void create(vk::render_device &dev, u32 size, VkFormat format, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags) + { + if (m_buffer) throw EXCEPTION("Buffer create called on an existing buffer!"); + + owner = &dev; + + VkBufferCreateInfo infos; + infos.pNext = nullptr; + infos.size = size; + infos.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + infos.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + infos.flags = flags; + infos.usage = usage; + infos.pQueueFamilyIndices = nullptr; + infos.queueFamilyIndexCount = 0; + + CHECK_RESULT(vkCreateBuffer(dev, &infos, nullptr, &m_buffer)); + + //Allocate vram for this buffer + vkGetBufferMemoryRequirements(dev, m_buffer, &m_memory_layout); + vram.allocate_from_pool(dev, m_memory_layout.size, m_memory_layout.memoryTypeBits); + + viewable = !!(usage & (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)); + + //Bind buffer memory + vkBindBufferMemory(dev, m_buffer, vram, 0); + + m_size = m_memory_layout.size; + m_usage = usage; + m_flags = flags; + + set_format(format); + } + + void create(vk::render_device &dev, u32 size, VkFormat format, VkBufferUsageFlagBits usage) + { + create(dev, size, format, usage, 0); + } + + void create(vk::render_device &dev, u32 size, VkFormat format) + { + create(dev, size, format, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); + } + + void create(vk::render_device &dev, u32 size) + { + create(dev, size, VK_FORMAT_UNDEFINED); + } + + void *map(u32 offset, u64 size) + { + void *data = nullptr; + if (size == VK_WHOLE_SIZE) + size = m_memory_layout.size; + + CHECK_RESULT(vkMapMemory((*owner), vram, offset, size, 0, &data)); + return data; + } + + void unmap() + { + vkUnmapMemory((*owner), vram); + } + + void sub_data(u32 offset, u32 size, void *data) + { + //TODO: Synchronization + if (!data && (m_size < size)) + { + vk::render_device *pdev = owner; + + destroy(); + create((*pdev), size, m_internal_format, m_usage, m_flags); + } + + if (!data) return; + if ((offset + size) > m_size) + { + vk::render_device *tmp_owner = owner; + destroy(); + create((*tmp_owner), size, m_internal_format, m_usage, m_flags); + } + + u8 *dst = (u8*)map(offset, size); + u8 *src = (u8*)data; + + memcpy(dst, src, size); + unmap(); + } + + void destroy() + { + if (!owner) return; + + vkDestroyBufferView((*owner), m_view, nullptr); + vkDestroyBuffer((*owner), m_buffer, nullptr); + vram.destroy(); + + owner = nullptr; + m_view = nullptr; + m_buffer = nullptr; + m_internal_format = VK_FORMAT_UNDEFINED; + } + + void set_format(VkFormat format) + { + if (m_internal_format == format || format == VK_FORMAT_UNDEFINED || !viewable) + return; + + if (m_view) + { + vkDestroyBufferView((*owner), m_view, nullptr); + m_view = nullptr; + } + + VkFormatProperties format_properties; + vk::physical_device dev = owner->gpu(); + vkGetPhysicalDeviceFormatProperties(dev, format, &format_properties); + + if (!(format_properties.bufferFeatures & VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) + throw EXCEPTION("Can't map view to requested format"); + + VkBufferViewCreateInfo view_info; + view_info.buffer = m_buffer; + view_info.flags = 0; + view_info.format = format; + view_info.offset = 0; + view_info.pNext = nullptr; + view_info.range = m_size; + view_info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + + CHECK_RESULT(vkCreateBufferView((*owner), &view_info, nullptr, &m_view)); + + m_internal_format = format; + } + + u32 size() + { + return m_size; + } + + vk::render_device& get_owner() + { + return (*owner); + } + + operator VkBuffer() + { + return m_buffer; + } + + operator VkBufferView() + { + if (!viewable) + throw EXCEPTION("Invalid usage! Buffer cannot be viewed as texels."); + + return m_view; + } + }; + + class framebuffer + { + VkFramebuffer m_vk_framebuffer = nullptr; + vk::render_device *owner = nullptr; + + public: + framebuffer() {} + ~framebuffer() {} + + void create(vk::render_device &dev, VkRenderPass pass, VkImageView *attachments, u32 nb_attachments, u32 width, u32 height) + { + VkFramebufferCreateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + infos.flags = 0; + infos.width = width; + infos.height = height; + infos.pNext = nullptr; + infos.attachmentCount = nb_attachments; + infos.pAttachments = attachments; + infos.renderPass = pass; + infos.layers = 1; + + vkCreateFramebuffer(dev, &infos, nullptr, &m_vk_framebuffer); + owner = &dev; + } + + void destroy() + { + if (!owner) return; + + vkDestroyFramebuffer((*owner), m_vk_framebuffer, nullptr); + owner = nullptr; + } + + operator VkFramebuffer() const + { + return m_vk_framebuffer; + } + }; + + class swap_chain_image + { + VkImageView view = nullptr; + VkImage image = nullptr; + VkFormat internal_format; + vk::render_device *owner = nullptr; + + public: + swap_chain_image() {} + + void create(vk::render_device &dev, VkImage &swap_image, VkFormat format) + { + VkImageViewCreateInfo color_image_view; + + color_image_view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + color_image_view.pNext = nullptr; + color_image_view.format = format; + + color_image_view.components.r = VK_COMPONENT_SWIZZLE_R; + color_image_view.components.g = VK_COMPONENT_SWIZZLE_G; + color_image_view.components.b = VK_COMPONENT_SWIZZLE_B; + color_image_view.components.a = VK_COMPONENT_SWIZZLE_A; + + color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + color_image_view.subresourceRange.baseMipLevel = 0; + color_image_view.subresourceRange.levelCount = 1; + color_image_view.subresourceRange.baseArrayLayer = 0; + color_image_view.subresourceRange.layerCount = 1; + + color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D; + color_image_view.flags = 0; + + color_image_view.image = swap_image; + vkCreateImageView(dev, &color_image_view, nullptr, &view); + + image = swap_image; + internal_format = format; + owner = &dev; + } + + void discard(vk::render_device &dev) + { + vkDestroyImageView(dev, view, nullptr); + } + + operator VkImage() + { + return image; + } + + operator VkImageView() + { + return view; + } + + operator vk::texture() + { + return vk::texture(*this); + } + }; + + class swap_chain + { + vk::render_device dev; + + uint32_t m_present_queue = 0xFFFF; + uint32_t m_graphics_queue = 0xFFFF; + + VkQueue vk_graphics_queue = nullptr; + VkQueue vk_present_queue = nullptr; + + /* WSI surface information */ + VkSurfaceKHR m_surface = nullptr; + VkFormat m_surface_format; + VkColorSpaceKHR m_color_space; + + VkSwapchainKHR m_vk_swapchain = nullptr; + std::vector m_swap_images; + + public: + + PFN_vkCreateSwapchainKHR createSwapchainKHR; + PFN_vkDestroySwapchainKHR destroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR getSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR acquireNextImageKHR; + PFN_vkQueuePresentKHR queuePresentKHR; + + swap_chain(vk::physical_device &gpu, uint32_t _present_queue, uint32_t _graphics_queue, VkFormat format, VkSurfaceKHR surface, VkColorSpaceKHR color_space) + { + dev = render_device(gpu, _graphics_queue); + + createSwapchainKHR = (PFN_vkCreateSwapchainKHR)vkGetDeviceProcAddr(dev, "vkCreateSwapchainKHR"); + destroySwapchainKHR = (PFN_vkDestroySwapchainKHR)vkGetDeviceProcAddr(dev, "vkDestroySwapchainKHR"); + getSwapchainImagesKHR = (PFN_vkGetSwapchainImagesKHR)vkGetDeviceProcAddr(dev, "vkGetSwapchainImagesKHR"); + acquireNextImageKHR = (PFN_vkAcquireNextImageKHR)vkGetDeviceProcAddr(dev, "vkAcquireNextImageKHR"); + queuePresentKHR = (PFN_vkQueuePresentKHR)vkGetDeviceProcAddr(dev, "vkQueuePresentKHR"); + + vkGetDeviceQueue(dev, _graphics_queue, 0, &vk_graphics_queue); + vkGetDeviceQueue(dev, _present_queue, 0, &vk_present_queue); + + m_present_queue = _present_queue; + m_graphics_queue = _graphics_queue; + m_surface = surface; + m_color_space = color_space; + m_surface_format = format; + } + + ~swap_chain() + { + } + + void destroy() + { + if (VkDevice pdev = (VkDevice)dev) + { + if (m_vk_swapchain) + { + if (m_swap_images.size()) + { + for (vk::swap_chain_image &img : m_swap_images) + img.discard(dev); + } + + destroySwapchainKHR(pdev, m_vk_swapchain, nullptr); + } + + dev.destroy(); + } + } + + void init_swapchain(u32 width, u32 height) + { + VkSwapchainKHR old_swapchain = m_vk_swapchain; + + uint32_t num_modes; + vk::physical_device& gpu = const_cast(dev.gpu()); + CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &num_modes, NULL)); + + std::vector present_mode_descriptors(num_modes); + CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &num_modes, present_mode_descriptors.data())); + + VkSurfaceCapabilitiesKHR surface_descriptors; + CHECK_RESULT(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(gpu, m_surface, &surface_descriptors)); + + VkExtent2D swapchainExtent; + + if (surface_descriptors.currentExtent.width == (uint32_t)-1) + { + swapchainExtent.width = width; + swapchainExtent.height = height; + } + else + { + swapchainExtent = surface_descriptors.currentExtent; + width = surface_descriptors.currentExtent.width; + height = surface_descriptors.currentExtent.height; + } + + VkPresentModeKHR swapchain_present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + uint32_t nb_swap_images = surface_descriptors.minImageCount + 1; + + if ((surface_descriptors.maxImageCount > 0) && (nb_swap_images > surface_descriptors.maxImageCount)) + { + // Application must settle for fewer images than desired: + nb_swap_images = surface_descriptors.maxImageCount; + } + + VkSurfaceTransformFlagBitsKHR pre_transform = surface_descriptors.currentTransform; + if (surface_descriptors.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) + pre_transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + + VkSwapchainCreateInfoKHR swap_info; + swap_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + swap_info.pNext = nullptr; + swap_info.surface = m_surface; + swap_info.minImageCount = nb_swap_images; + swap_info.imageFormat = m_surface_format; + swap_info.imageColorSpace = m_color_space; + + swap_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT; + swap_info.preTransform = pre_transform; + swap_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + swap_info.imageArrayLayers = 1; + swap_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swap_info.queueFamilyIndexCount = 0; + swap_info.pQueueFamilyIndices = nullptr; + swap_info.presentMode = swapchain_present_mode; + swap_info.oldSwapchain = old_swapchain; + swap_info.clipped = true; + + swap_info.imageExtent.width = width; + swap_info.imageExtent.height = height; + + createSwapchainKHR(dev, &swap_info, nullptr, &m_vk_swapchain); + + if (old_swapchain) + destroySwapchainKHR(dev, old_swapchain, nullptr); + + nb_swap_images = 0; + getSwapchainImagesKHR(dev, m_vk_swapchain, &nb_swap_images, nullptr); + + if (!nb_swap_images) throw EXCEPTION("Undefined trap"); + + std::vector swap_images; + swap_images.resize(nb_swap_images); + getSwapchainImagesKHR(dev, m_vk_swapchain, &nb_swap_images, swap_images.data()); + + m_swap_images.resize(nb_swap_images); + for (u32 i = 0; i < nb_swap_images; ++i) + { + m_swap_images[i].create(dev, swap_images[i], m_surface_format); + } + } + + u32 get_swap_image_count() + { + return m_swap_images.size(); + } + + vk::swap_chain_image& get_swap_chain_image(const int index) + { + return m_swap_images[index]; + } + + const vk::render_device& get_device() + { + return dev; + } + + const VkQueue& get_present_queue() + { + return vk_graphics_queue; + } + + const VkFormat get_surface_format() + { + return m_surface_format; + } + + operator const VkSwapchainKHR() + { + return m_vk_swapchain; + } + }; + + class command_pool + { + vk::render_device *owner = nullptr; + VkCommandPool pool = nullptr; + + public: + command_pool() {} + ~command_pool() {} + + void create(vk::render_device &dev) + { + owner = &dev; + VkCommandPoolCreateInfo infos; + infos.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + infos.pNext = nullptr; + infos.queueFamilyIndex = 0; + infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + + CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool)); + } + + void destroy() + { + if (!pool) + return; + + vkDestroyCommandPool((*owner), pool, nullptr); + pool = nullptr; + } + + vk::render_device& get_owner() + { + return (*owner); + } + + operator VkCommandPool() + { + return pool; + } + }; + + class command_buffer + { + vk::command_pool *pool = nullptr; + VkCommandBuffer commands = nullptr; + + public: + command_buffer() {} + ~command_buffer() {} + + void create(vk::command_pool &cmd_pool) + { + VkCommandBufferAllocateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + infos.commandBufferCount = 1; + infos.commandPool = (VkCommandPool)cmd_pool; + infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + infos.pNext = nullptr; + + CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands)); + pool = &cmd_pool; + } + + void destroy() + { + vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands); + } + + operator VkCommandBuffer() + { + return commands; + } + }; + + class context + { + private: + std::vector gpus; + + std::vector m_vk_instances; + VkInstance m_instance; + + PFN_vkDestroyDebugReportCallbackEXT destroyDebugReportCallback = nullptr; + PFN_vkCreateDebugReportCallbackEXT createDebugReportCallback = nullptr; + VkDebugReportCallbackEXT m_debugger = nullptr; + + public: + + context() + { + m_instance = nullptr; + } + + ~context() + { + if (m_instance || m_vk_instances.size()) + close(); + } + + void close() + { + if (!m_vk_instances.size()) return; + + if (m_debugger) + { + destroyDebugReportCallback(m_instance, m_debugger, nullptr); + m_debugger = nullptr; + } + + for (VkInstance &inst : m_vk_instances) + { + vkDestroyInstance(inst, nullptr); + } + + m_instance = nullptr; + m_vk_instances.resize(0); + } + + void enable_debugging() + { + PFN_vkDebugReportCallbackEXT callback = vk::dbgFunc; + + createDebugReportCallback = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(m_instance, "vkCreateDebugReportCallbackEXT"); + destroyDebugReportCallback = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(m_instance, "vkDestroyDebugReportCallbackEXT"); + + VkDebugReportCallbackCreateInfoEXT dbgCreateInfo; + dbgCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT; + dbgCreateInfo.pNext = NULL; + dbgCreateInfo.pfnCallback = callback; + dbgCreateInfo.pUserData = NULL; + dbgCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; + + CHECK_RESULT(createDebugReportCallback(m_instance, &dbgCreateInfo, NULL, &m_debugger)); + } + + uint32_t createInstance(const char *app_name) + { + //Initialize a vulkan instance + VkApplicationInfo app; + + app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app.pNext = nullptr; + app.pApplicationName = app_name; + app.applicationVersion = 0; + app.pEngineName = app_name; + app.engineVersion = 0; + app.apiVersion = (1, 0, 0); + + //Set up instance information + const char *requested_extensions[] = + { + "VK_KHR_surface", + "VK_KHR_win32_surface", + "VK_EXT_debug_report", + }; + + const char *validation_layers[] = + { + "VK_LAYER_LUNARG_threading", "VK_LAYER_LUNARG_mem_tracker", + "VK_LAYER_LUNARG_object_tracker", "VK_LAYER_LUNARG_draw_state", + "VK_LAYER_LUNARG_param_checker", "VK_LAYER_LUNARG_swapchain", + "VK_LAYER_LUNARG_device_limits", "VK_LAYER_LUNARG_image", + "VK_LAYER_GOOGLE_unique_objects", + }; + + VkInstanceCreateInfo instance_info; + instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_info.pNext = nullptr; + instance_info.pApplicationInfo = &app; + instance_info.enabledLayerCount = VK_ENABLED_LAYER_COUNT; + instance_info.ppEnabledLayerNames = validation_layers; + instance_info.enabledExtensionCount = 3; + instance_info.ppEnabledExtensionNames = requested_extensions; + + VkInstance instance; + VkResult error = vkCreateInstance(&instance_info, nullptr, &instance); + + if (error != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + + m_vk_instances.push_back(instance); + return m_vk_instances.size(); + } + + void makeCurrentInstance(uint32_t instance_id) + { + if (!instance_id || instance_id > m_vk_instances.size()) + throw EXCEPTION("Undefined trap"); + + if (m_debugger) + { + destroyDebugReportCallback(m_instance, m_debugger, nullptr); + m_debugger = nullptr; + } + + instance_id--; + m_instance = m_vk_instances[instance_id]; + } + + VkInstance getCurrentInstance() + { + return m_instance; + } + + VkInstance getInstanceById(uint32_t instance_id) + { + if (!instance_id || instance_id > m_vk_instances.size()) + throw EXCEPTION("Undefined trap"); + + instance_id--; + return m_vk_instances[instance_id]; + } + + std::vector& enumerateDevices() + { + uint32_t num_gpus; + CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, nullptr)); + + if (gpus.size() != num_gpus) + { + std::vector pdevs(num_gpus); + gpus.resize(num_gpus); + + CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, pdevs.data())); + + for (int i = 0; i < num_gpus; ++i) + gpus[i].set_device(pdevs[i]); + } + + return gpus; + } + + vk::swap_chain* createSwapChain(HINSTANCE hInstance, HWND hWnd, vk::physical_device &dev) + { + VkWin32SurfaceCreateInfoKHR createInfo; + createInfo.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; + createInfo.pNext = NULL; + createInfo.flags = 0; + createInfo.hinstance = hInstance; + createInfo.hwnd = hWnd; + + VkSurfaceKHR surface; + VkResult err = vkCreateWin32SurfaceKHR(m_instance, &createInfo, NULL, &surface); + + uint32_t device_queues = dev.get_queue_count(); + std::vector supportsPresent(device_queues); + + for (int index = 0; index < device_queues; index++) + { + vkGetPhysicalDeviceSurfaceSupportKHR(dev, index, surface, &supportsPresent[index]); + } + + // Search for a graphics and a present queue in the array of queue + // families, try to find one that supports both + uint32_t graphicsQueueNodeIndex = UINT32_MAX; + uint32_t presentQueueNodeIndex = UINT32_MAX; + + for (int i = 0; i < device_queues; i++) + { + if ((dev.get_queue_properties(i).queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) + { + if (graphicsQueueNodeIndex == UINT32_MAX) + graphicsQueueNodeIndex = i; + + if (supportsPresent[i] == VK_TRUE) + { + graphicsQueueNodeIndex = i; + presentQueueNodeIndex = i; + + break; + } + } + } + + if (presentQueueNodeIndex == UINT32_MAX) + { + // If didn't find a queue that supports both graphics and present, then + // find a separate present queue. + for (uint32_t i = 0; i < device_queues; ++i) + { + if (supportsPresent[i] == VK_TRUE) + { + presentQueueNodeIndex = i; + break; + } + } + } + + // Generate error if could not find both a graphics and a present queue + if (graphicsQueueNodeIndex == UINT32_MAX || presentQueueNodeIndex == UINT32_MAX) + throw EXCEPTION("Undefined trap"); + + if (graphicsQueueNodeIndex != presentQueueNodeIndex) + throw EXCEPTION("Undefined trap"); + + // Get the list of VkFormat's that are supported: + uint32_t formatCount; + err = vkGetPhysicalDeviceSurfaceFormatsKHR(dev, surface, &formatCount, nullptr); + if (err != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + + std::vector surfFormats(formatCount); + err = vkGetPhysicalDeviceSurfaceFormatsKHR(dev, surface, &formatCount, surfFormats.data()); + if (err != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + + VkFormat format; + VkColorSpaceKHR color_space; + + if (formatCount == 1 && surfFormats[0].format == VK_FORMAT_UNDEFINED) + { + format = VK_FORMAT_B8G8R8A8_UNORM; + } + else + { + if (!formatCount) throw EXCEPTION("Undefined trap"); + format = surfFormats[0].format; + } + + color_space = surfFormats[0].colorSpace; + + return new swap_chain(dev, presentQueueNodeIndex, graphicsQueueNodeIndex, format, surface, color_space); + } + }; + + class descriptor_pool + { + VkDescriptorPool pool = nullptr; + vk::render_device *owner = nullptr; + + public: + descriptor_pool() {} + ~descriptor_pool() {} + + void create(vk::render_device &dev, VkDescriptorPoolSize *sizes, u32 size_descriptors_count) + { + VkDescriptorPoolCreateInfo infos; + infos.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + infos.maxSets = 2; + infos.pNext = nullptr; + infos.poolSizeCount = size_descriptors_count; + infos.pPoolSizes = sizes; + infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + + owner = &dev; + CHECK_RESULT(vkCreateDescriptorPool(dev, &infos, nullptr, &pool)); + } + + void destroy() + { + if (!pool) return; + + vkDestroyDescriptorPool((*owner), pool, nullptr); + owner = nullptr; + pool = nullptr; + } + + bool valid() + { + return (pool != nullptr); + } + + operator VkDescriptorPool() + { + return pool; + } + }; + + namespace glsl + { + enum program_domain + { + glsl_vertex_program = 0, + glsl_fragment_program = 1 + }; + + enum program_input_type + { + input_type_uniform_buffer = 0, + input_type_texel_buffer = 1, + input_type_texture = 2 + }; + + struct bound_sampler + { + VkImageView image_view = nullptr; + VkSampler sampler = nullptr; + }; + + struct bound_buffer + { + VkBufferView buffer_view = nullptr; + VkBuffer buffer = nullptr; + u32 offset = 0; + u32 size = 0; + }; + + struct program_input + { + program_domain domain; + program_input_type type; + + bound_buffer as_buffer; + bound_sampler as_sampler; + + int location; + std::string name; + }; + + class program + { + struct pipeline_state + { + VkGraphicsPipelineCreateInfo pipeline; + VkPipelineCacheCreateInfo pipeline_cache_desc; + VkPipelineCache pipeline_cache; + VkPipelineVertexInputStateCreateInfo vi; + VkPipelineInputAssemblyStateCreateInfo ia; + VkPipelineRasterizationStateCreateInfo rs; + VkPipelineColorBlendStateCreateInfo cb; + VkPipelineDepthStencilStateCreateInfo ds; + VkPipelineViewportStateCreateInfo vp; + VkPipelineMultisampleStateCreateInfo ms; + VkDynamicState dynamic_state_descriptors[VK_DYNAMIC_STATE_RANGE_SIZE]; + VkPipelineDynamicStateCreateInfo dynamic_state; + + VkPipelineColorBlendAttachmentState att_state[4]; + + VkPipelineShaderStageCreateInfo shader_stages[2]; + VkRenderPass render_pass = nullptr; + VkShaderModule vs, fs; + VkPipeline pipeline_handle = nullptr; + + VkDescriptorSetLayout descriptor_layouts[2];; + VkDescriptorSet descriptor_sets[2]; + VkPipelineLayout pipeline_layout; + + int num_targets = 1; + + bool dirty; + bool in_use; + } + pstate; + + bool uniforms_changed = true; + + vk::render_device *device = nullptr; + std::vector uniforms; + vk::descriptor_pool descriptor_pool; + + void init_pipeline(); + + public: + program(); + program(const program&) = delete; + program(program&& other); + program(vk::render_device &renderer); + + ~program(); + + program& attach_device(vk::render_device &dev); + program& attachFragmentProgram(VkShaderModule prog); + program& attachVertexProgram(VkShaderModule prog); + + void make(); + void destroy(); + + //Render state stuff... + void set_depth_compare_op(VkCompareOp op); + void set_depth_write_mask(VkBool32 write_enable); + void set_depth_test_enable(VkBool32 state); + void set_primitive_topology(VkPrimitiveTopology topology); + void set_color_mask(int num_targets, u8* targets, VkColorComponentFlags *flags); + void set_blend_state(int num_targets, u8* targets, VkBool32 *enable); + void set_blend_state(int num_targets, u8* targets, VkBool32 enable); + void set_blend_func(int num_targets, u8* targets, VkBlendFactor *src_color, VkBlendFactor *dst_color, VkBlendFactor *src_alpha, VkBlendFactor *dst_alpha); + void set_blend_func(int num_targets, u8 * targets, VkBlendFactor src_color, VkBlendFactor dst_color, VkBlendFactor src_alpha, VkBlendFactor dst_alpha); + void set_blend_op(int num_targets, u8* targets, VkBlendOp* color_ops, VkBlendOp* alpha_ops); + void set_blend_op(int num_targets, u8 * targets, VkBlendOp color_op, VkBlendOp alpha_op); + void set_primitive_restart(VkBool32 state); + + void init_descriptor_layout(); + void update_descriptors(); + void destroy_descriptors(); + + void set_draw_buffer_count(u8 draw_buffers); + + program& load_uniforms(program_domain domain, std::vector& inputs); + + void use(vk::command_buffer& commands, VkRenderPass pass, u32 subpass); + + bool has_uniform(program_domain domain, std::string uniform_name); + bool bind_uniform(program_domain domain, std::string uniform_name); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::texture &_texture); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store); + + program& operator = (const program&) = delete; + program& operator = (program&& other); + }; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h new file mode 100644 index 0000000000..ecb8cf90f1 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -0,0 +1,47 @@ +#pragma once +#include "VKVertexProgram.h" +#include "VKFragmentProgram.h" +#include "../Common/ProgramStateCache.h" + +struct VKTraits +{ + using vertex_program_type = VKVertexProgram; + using fragment_program_type = VKFragmentProgram; + using pipeline_storage_type = vk::glsl::program; + using pipeline_properties = void*; + + static + void recompile_fragment_program(const RSXFragmentProgram &RSXFP, fragment_program_type& fragmentProgramData, size_t ID) + { + fragmentProgramData.Decompile(RSXFP); + fragmentProgramData.Compile(); + } + + static + void recompile_vertex_program(const RSXVertexProgram &RSXVP, vertex_program_type& vertexProgramData, size_t ID) + { + vertexProgramData.Decompile(RSXVP); + vertexProgramData.Compile(); + } + + static + pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties) + { + pipeline_storage_type result(*vk::get_current_renderer()); + + std::vector vertex_uniforms = vertexProgramData.uniforms; + std::vector fragment_uniforms = fragmentProgramData.uniforms; + + result.attachVertexProgram(vertexProgramData.handle) + .attachFragmentProgram(fragmentProgramData.handle) + .load_uniforms(vk::glsl::glsl_vertex_program, vertex_uniforms) + .load_uniforms(vk::glsl::glsl_fragment_program, fragment_uniforms) + .make(); + + return result; + } +}; + +class VKProgramBuffer : public program_state_cache +{ +}; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp new file mode 100644 index 0000000000..faa9e7a328 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -0,0 +1,805 @@ +#include "stdafx.h" +#include "VKHelpers.h" + +namespace vk +{ + namespace glsl + { + program::program() + { + memset(&pstate, 0, sizeof(pstate)); + } + + program::program(vk::render_device &renderer) + { + memset(&pstate, 0, sizeof(pstate)); + init_pipeline(); + device = &renderer; + } + + program::program(program&& other) + { + //This object does not yet exist in a valid state. Clear the original + memset(&pstate, 0, sizeof(pstate)); + + pipeline_state tmp; + memcpy(&tmp, &pstate, sizeof pstate); + memcpy(&pstate, &other.pstate, sizeof pstate); + memcpy(&other.pstate, &tmp, sizeof pstate); + + std::vector tmp_uniforms = uniforms; + uniforms = other.uniforms; + other.uniforms = tmp_uniforms; + + vk::descriptor_pool tmp_pool; + descriptor_pool = other.descriptor_pool; + other.descriptor_pool = tmp_pool; + + vk::render_device *tmp_dev = device; + device = other.device; + other.device = tmp_dev; + + bool _uniforms_changed = uniforms_changed; + uniforms_changed = other.uniforms_changed; + other.uniforms_changed = _uniforms_changed; + } + + program& program::operator = (program&& other) + { + pipeline_state tmp; + memcpy(&tmp, &pstate, sizeof pstate); + memcpy(&pstate, &other.pstate, sizeof pstate); + memcpy(&other.pstate, &tmp, sizeof pstate); + + std::vector tmp_uniforms = uniforms; + uniforms = other.uniforms; + other.uniforms = tmp_uniforms; + + vk::descriptor_pool tmp_pool; + descriptor_pool = other.descriptor_pool; + other.descriptor_pool = tmp_pool; + + vk::render_device *tmp_dev = device; + device = other.device; + other.device = tmp_dev; + + bool _uniforms_changed = uniforms_changed; + uniforms_changed = other.uniforms_changed; + other.uniforms_changed = _uniforms_changed; + + return *this; + } + + void program::init_pipeline() + { + pstate.dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + pstate.dynamic_state.pDynamicStates = pstate.dynamic_state_descriptors; + + pstate.pipeline.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pstate.pipeline.layout = nullptr; + + pstate.vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + + pstate.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + pstate.ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + pstate.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + pstate.rs.polygonMode = VK_POLYGON_MODE_FILL; + pstate.rs.cullMode = VK_CULL_MODE_NONE; + pstate.rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + pstate.rs.depthClampEnable = VK_FALSE; + pstate.rs.rasterizerDiscardEnable = VK_FALSE; + pstate.rs.depthBiasEnable = VK_FALSE; + + pstate.cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + pstate.cb.attachmentCount = 1; + pstate.cb.pAttachments = pstate.att_state; + + for (int i = 0; i < 4; ++i) + { + pstate.att_state[i].colorWriteMask = 0xf; + pstate.att_state[i].blendEnable = VK_FALSE; + } + + pstate.vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + pstate.vp.viewportCount = 1; + pstate.dynamic_state_descriptors[pstate.dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + pstate.vp.scissorCount = 1; + pstate.dynamic_state_descriptors[pstate.dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + pstate.dynamic_state_descriptors[pstate.dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_LINE_WIDTH; + + pstate.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + pstate.ds.depthTestEnable = VK_FALSE; + pstate.ds.depthWriteEnable = VK_TRUE; + pstate.ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + pstate.ds.depthBoundsTestEnable = VK_FALSE; + pstate.ds.back.failOp = VK_STENCIL_OP_KEEP; + pstate.ds.back.passOp = VK_STENCIL_OP_KEEP; + pstate.ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + pstate.ds.stencilTestEnable = VK_FALSE; + pstate.ds.front = pstate.ds.back; + + pstate.ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + pstate.ms.pSampleMask = NULL; + pstate.ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + pstate.fs = nullptr; + pstate.vs = nullptr; + pstate.dirty = true; + + pstate.pipeline.stageCount = 2; + + pstate.shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pstate.shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + pstate.shader_stages[0].module = nullptr; + pstate.shader_stages[0].pName = "main"; + + pstate.shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pstate.shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + pstate.shader_stages[1].module = nullptr; + pstate.shader_stages[1].pName = "main"; + + pstate.pipeline_cache_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + } + + program::~program() + { + LOG_ERROR(RSX, "Program destructor invoked!"); + destroy(); + } + + program& program::attach_device(vk::render_device &dev) + { + if (!device) + init_pipeline(); + + device = &dev; + return *this; + } + + program& program::attachFragmentProgram(VkShaderModule prog) + { + pstate.fs = prog; + return *this; + } + + program& program::attachVertexProgram(VkShaderModule prog) + { + pstate.vs = prog; + return *this; + } + + void program::make() + { + if (pstate.fs == nullptr || pstate.vs == nullptr) + throw EXCEPTION("Missing shader stage!"); + + pstate.shader_stages[0].module = pstate.vs; + pstate.shader_stages[1].module = pstate.fs; + + CHECK_RESULT(vkCreatePipelineCache((*device), &pstate.pipeline_cache_desc, nullptr, &pstate.pipeline_cache)); + } + + void program::set_depth_compare_op(VkCompareOp op) + { + if (pstate.ds.depthCompareOp != op) + { + pstate.ds.depthCompareOp = op; + pstate.dirty = true; + } + } + + void program::set_depth_write_mask(VkBool32 write_enable) + { + if (pstate.ds.depthWriteEnable != write_enable) + { + pstate.ds.depthWriteEnable = write_enable; + pstate.dirty = true; + } + } + + void program::set_depth_test_enable(VkBool32 state) + { + if (pstate.ds.depthTestEnable != state) + { + pstate.ds.depthTestEnable = state; + pstate.dirty = true; + } + } + + void program::set_primitive_topology(VkPrimitiveTopology topology) + { + if (pstate.ia.topology != topology) + { + pstate.ia.topology = topology; + pstate.dirty = true; + } + } + + void program::set_color_mask(int num_targets, u8* targets, VkColorComponentFlags* flags) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].colorWriteMask != flags[idx]) + { + pstate.att_state[id].colorWriteMask = flags[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_state(int num_targets, u8* targets, VkBool32* enable) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].blendEnable != enable[idx]) + { + pstate.att_state[id].blendEnable = enable[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_state(int num_targets, u8 *targets, VkBool32 enable) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].blendEnable != enable) + { + pstate.att_state[id].blendEnable = enable; + pstate.dirty = true; + } + } + } + + void program::set_blend_func(int num_targets, u8* targets, VkBlendFactor* src_color, VkBlendFactor* dst_color, VkBlendFactor* src_alpha, VkBlendFactor* dst_alpha) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].srcColorBlendFactor != src_color[idx]) + { + pstate.att_state[id].srcColorBlendFactor = src_color[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstColorBlendFactor != dst_color[idx]) + { + pstate.att_state[id].dstColorBlendFactor = dst_color[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].srcAlphaBlendFactor != src_alpha[idx]) + { + pstate.att_state[id].srcAlphaBlendFactor = src_alpha[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstAlphaBlendFactor != dst_alpha[idx]) + { + pstate.att_state[id].dstAlphaBlendFactor = dst_alpha[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_func(int num_targets, u8* targets, VkBlendFactor src_color, VkBlendFactor dst_color, VkBlendFactor src_alpha, VkBlendFactor dst_alpha) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].srcColorBlendFactor != src_color) + { + pstate.att_state[id].srcColorBlendFactor = src_color; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstColorBlendFactor != dst_color) + { + pstate.att_state[id].dstColorBlendFactor = dst_color; + pstate.dirty = true; + } + + if (pstate.att_state[id].srcAlphaBlendFactor != src_alpha) + { + pstate.att_state[id].srcAlphaBlendFactor = src_alpha; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstAlphaBlendFactor != dst_alpha) + { + pstate.att_state[id].dstAlphaBlendFactor = dst_alpha; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_op(int num_targets, u8* targets, VkBlendOp* color_ops, VkBlendOp* alpha_ops) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].colorBlendOp != color_ops[idx]) + { + pstate.att_state[id].colorBlendOp = color_ops[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].alphaBlendOp != alpha_ops[idx]) + { + pstate.att_state[id].alphaBlendOp = alpha_ops[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_op(int num_targets, u8* targets, VkBlendOp color_op, VkBlendOp alpha_op) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].colorBlendOp != color_op) + { + pstate.att_state[id].colorBlendOp = color_op; + pstate.dirty = true; + } + + if (pstate.att_state[id].alphaBlendOp != alpha_op) + { + pstate.att_state[id].alphaBlendOp = alpha_op; + pstate.dirty = true; + } + } + } + } + + void program::set_primitive_restart(VkBool32 state) + { + if (pstate.ia.primitiveRestartEnable != state) + { + pstate.ia.primitiveRestartEnable = state; + pstate.dirty = true; + } + } + + void program::init_descriptor_layout() + { + if (pstate.descriptor_layouts[0] != nullptr) + throw EXCEPTION("Existing descriptors found!"); + + if (descriptor_pool.valid()) + descriptor_pool.destroy(); + + std::vector layout_bindings[2]; + std::vector sizes; + + program_input_type types[] = { input_type_uniform_buffer, input_type_texel_buffer, input_type_texture }; + program_domain stages[] = { glsl_vertex_program, glsl_fragment_program }; + + VkDescriptorType vk_ids[] = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER }; + VkShaderStageFlags vk_stages[] = { VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_FRAGMENT_BIT }; + + for (auto &input : uniforms) + { + VkDescriptorSetLayoutBinding binding; + binding.binding = input.location; + binding.descriptorCount = 1; + binding.descriptorType = vk_ids[(u32)input.type]; + binding.pImmutableSamplers = nullptr; + binding.stageFlags = vk_stages[(u32)input.domain]; + + layout_bindings[(u32)input.domain].push_back(binding); + } + + for (int i = 0; i < 3; ++i) + { + u32 count = 0; + for (auto &input : uniforms) + { + if (input.type == types[i]) + count++; + } + + if (!count) continue; + + VkDescriptorPoolSize size; + size.descriptorCount = count; + size.type = vk_ids[i]; + + sizes.push_back(size); + } + + descriptor_pool.create((*device), sizes.data(), sizes.size()); + + VkDescriptorSetLayoutCreateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + infos.pNext = nullptr; + infos.flags = 0; + infos.pBindings = layout_bindings[0].data(); + infos.bindingCount = layout_bindings[0].size(); + + CHECK_RESULT(vkCreateDescriptorSetLayout((*device), &infos, nullptr, &pstate.descriptor_layouts[0])); + + infos.pBindings = layout_bindings[1].data(); + infos.bindingCount = layout_bindings[1].size(); + + CHECK_RESULT(vkCreateDescriptorSetLayout((*device), &infos, nullptr, &pstate.descriptor_layouts[1])); + + VkPipelineLayoutCreateInfo layout_info; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.pNext = nullptr; + layout_info.setLayoutCount = 2; + layout_info.pSetLayouts = pstate.descriptor_layouts; + layout_info.flags = 0; + layout_info.pPushConstantRanges = nullptr; + layout_info.pushConstantRangeCount = 0; + + CHECK_RESULT(vkCreatePipelineLayout((*device), &layout_info, nullptr, &pstate.pipeline_layout)); + + VkDescriptorSetAllocateInfo alloc_info; + alloc_info.descriptorPool = descriptor_pool; + alloc_info.descriptorSetCount = 2; + alloc_info.pNext = nullptr; + alloc_info.pSetLayouts = pstate.descriptor_layouts; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + + CHECK_RESULT(vkAllocateDescriptorSets((*device), &alloc_info, pstate.descriptor_sets)); + } + + void program::update_descriptors() + { + if (!pstate.descriptor_layouts[0]) + init_descriptor_layout(); + + std::vector descriptor_writers; + std::vector images(16); + std::vector buffers(16); + std::vector texel_buffers(16); + std::vector texel_buffer_views(16); + VkWriteDescriptorSet write; + + int image_index = 0; + int buffer_index = 0; + int texel_buffer_index = 0; + + for (auto &input : uniforms) + { + switch (input.type) + { + case input_type_texture: + { + auto &image = images[image_index++]; + image.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image.sampler = null_sampler(); + image.imageView = null_image_view(); + + if (input.as_sampler.sampler && input.as_sampler.image_view) + { + image.imageView = input.as_sampler.image_view; + image.sampler = input.as_sampler.sampler; + image.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + else + LOG_ERROR(RSX, "Texture object was not bound: %s", input.name); + + memset(&write, 0, sizeof(write)); + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.pImageInfo = ℑ + write.descriptorCount = 1; + + break; + } + case input_type_uniform_buffer: + { + auto &buffer = buffers[buffer_index++]; + buffer.buffer = null_buffer(); + buffer.offset = 0; + buffer.range = 0; + + if (input.as_buffer.buffer) + { + buffer.buffer = input.as_buffer.buffer; + buffer.range = input.as_buffer.size; + } + else + LOG_ERROR(RSX, "UBO was not bound: %s", input.name); + + memset(&write, 0, sizeof(write)); + write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write.pBufferInfo = &buffer; + write.descriptorCount = 1; + break; + } + case input_type_texel_buffer: + { + auto &buffer_view = texel_buffer_views[texel_buffer_index]; + buffer_view = null_buffer_view(); + + auto &buffer = texel_buffers[texel_buffer_index++]; + buffer.buffer = null_buffer(); + buffer.offset = 0; + buffer.range = 0; + + if (input.as_buffer.buffer && input.as_buffer.buffer_view) + { + buffer_view = input.as_buffer.buffer_view; + buffer.buffer = input.as_buffer.buffer; + buffer.range = input.as_buffer.size; + } + else + LOG_ERROR(RSX, "Texel buffer was not bound: %s", input.name); + + memset(&write, 0, sizeof(write)); + write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + write.pTexelBufferView = &buffer_view; + write.pBufferInfo = &buffer; + write.descriptorCount = 1; + break; + } + default: + throw EXCEPTION("Unhandled input type!"); + } + + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.dstSet = pstate.descriptor_sets[input.domain]; + write.pNext = nullptr; + + write.dstBinding = input.location; + descriptor_writers.push_back(write); + } + + if (!descriptor_writers.size()) return; + if (descriptor_writers.size() != uniforms.size()) + throw EXCEPTION("Undefined uniform detected"); + + vkUpdateDescriptorSets((*device), descriptor_writers.size(), descriptor_writers.data(), 0, nullptr); + } + + void program::destroy_descriptors() + { + if (pstate.descriptor_sets[0]) + vkFreeDescriptorSets((*device), descriptor_pool, 2, pstate.descriptor_sets); + + if (pstate.pipeline_layout) + vkDestroyPipelineLayout((*device), pstate.pipeline_layout, nullptr); + + if (pstate.descriptor_layouts[0]) + vkDestroyDescriptorSetLayout((*device), pstate.descriptor_layouts[0], nullptr); + + if (pstate.descriptor_layouts[1]) + vkDestroyDescriptorSetLayout((*device), pstate.descriptor_layouts[1], nullptr); + + descriptor_pool.destroy(); + } + + void program::set_draw_buffer_count(u8 draw_buffers) + { + if (pstate.num_targets != draw_buffers) + { + pstate.num_targets = draw_buffers; + pstate.dirty = true; + } + } + + program& program::load_uniforms(program_domain domain, std::vector& inputs) + { + std::vector store = uniforms; + uniforms.resize(0); + + for (auto &item : store) + { + if (item.domain != domain) + uniforms.push_back(item); + } + + for (auto &item : inputs) + uniforms.push_back(item); + + return *this; + } + + void program::use(vk::command_buffer& commands, VkRenderPass pass, u32 subpass) + { + if (/*uniforms_changed*/true) + { + update_descriptors(); + uniforms_changed = false; + } + + if (pstate.dirty) + { + if (pstate.pipeline_handle) + vkDestroyPipeline((*device), pstate.pipeline_handle, nullptr); + + pstate.dynamic_state.pDynamicStates = pstate.dynamic_state_descriptors; + pstate.cb.pAttachments = pstate.att_state; + pstate.cb.attachmentCount = pstate.num_targets; + + //Reconfigure this.. + pstate.pipeline.pVertexInputState = &pstate.vi; + pstate.pipeline.pInputAssemblyState = &pstate.ia; + pstate.pipeline.pRasterizationState = &pstate.rs; + pstate.pipeline.pColorBlendState = &pstate.cb; + pstate.pipeline.pMultisampleState = &pstate.ms; + pstate.pipeline.pViewportState = &pstate.vp; + pstate.pipeline.pDepthStencilState = &pstate.ds; + pstate.pipeline.pStages = pstate.shader_stages; + pstate.pipeline.pDynamicState = &pstate.dynamic_state; + pstate.pipeline.layout = pstate.pipeline_layout; + pstate.pipeline.basePipelineIndex = -1; + pstate.pipeline.basePipelineHandle = VK_NULL_HANDLE; + + pstate.pipeline.renderPass = pass; + + CHECK_RESULT(vkCreateGraphicsPipelines((*device), nullptr, 1, &pstate.pipeline, NULL, &pstate.pipeline_handle)); + pstate.dirty = false; + } + + vkCmdBindPipeline(commands, VK_PIPELINE_BIND_POINT_GRAPHICS, pstate.pipeline_handle); + vkCmdBindDescriptorSets(commands, VK_PIPELINE_BIND_POINT_GRAPHICS, pstate.pipeline_layout, 0, 2, pstate.descriptor_sets, 0, nullptr); + } + + bool program::has_uniform(program_domain domain, std::string uniform_name) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + return true; + } + + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + uniform.as_buffer.buffer = nullptr; + uniform.as_buffer.buffer_view = nullptr; + uniform.as_sampler.image_view = nullptr; + uniform.as_sampler.sampler = nullptr; + + uniforms_changed = true; + return true; + } + } + + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::texture &_texture) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + VkImageView view = _texture; + VkSampler sampler = _texture; + + if (uniform.as_sampler.image_view != view || + uniform.as_sampler.sampler != sampler) + { + uniform.as_sampler.image_view = view; + uniform.as_sampler.sampler = sampler; + uniforms_changed = true; + } + + uniform.type = input_type_texture; + return true; + } + } + + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + VkBuffer buf = _buffer; + u32 size = _buffer.size(); + + if (uniform.as_buffer.buffer != buf || + uniform.as_buffer.size != size) + { + uniform.as_buffer.size = size; + uniform.as_buffer.buffer = buf; + uniform.as_buffer.buffer_view = nullptr; //UBOs cannot be viewed! + + uniforms_changed = true; + } + + uniform.type = input_type_uniform_buffer; + return true; + } + } + + throw EXCEPTION("Failed to bind program uniform %s", uniform_name); + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store) + { + if (!is_texel_store) + { + return bind_uniform(domain, uniform_name, _buffer); + } + + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + VkBuffer buf = _buffer; + VkBufferView view = _buffer; + u32 size = _buffer.size(); + + if (uniform.as_buffer.buffer != buf || + uniform.as_buffer.buffer_view != view || + uniform.as_buffer.size != size) + { + uniform.as_buffer.size = size; + uniform.as_buffer.buffer = buf; + uniform.as_buffer.buffer_view = view; + + if (!view) + throw EXCEPTION("Invalid buffer passed as texel storage"); + + uniforms_changed = true; + } + + uniform.type = input_type_texel_buffer; + return true; + } + } + + return false; + } + + void program::destroy() + { + if (device) + { + destroy_descriptors(); + uniforms.resize(0); + + if (pstate.pipeline_handle) + vkDestroyPipeline((*device), pstate.pipeline_handle, nullptr); + + if (pstate.pipeline_cache) + vkDestroyPipelineCache((*device), pstate.pipeline_cache, nullptr); + } + + memset(&pstate, 0, sizeof pstate); + device = nullptr; + } + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h new file mode 100644 index 0000000000..666a2b6e52 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -0,0 +1,118 @@ +#pragma once + +#include "stdafx.h" +#include "VKHelpers.h" +#include "../GCM.h" +#include "../Common/surface_store.h" + +namespace rsx +{ + struct vk_render_target_traits + { + using surface_storage_type = vk::texture ; + using surface_type = vk::texture*; + using command_list_type = vk::command_buffer*; + using download_buffer_object = void*; + + static vk::texture create_new_surface(u32 address, surface_color_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd) + { + VkFormat requested_format = vk::get_compatible_surface_format(format); + + vk::texture rtt; + rtt.create(device, requested_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height); + rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + return rtt; + } + + static vk::texture create_new_surface(u32 address, surface_depth_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd) + { + VkFormat requested_format = vk::get_compatible_depth_surface_format(format); + + vk::texture rtt; + rtt.create(device, requested_format, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height); + rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + return rtt; + } + + static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + } + + static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + static void prepare_ds_for_drawing(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + + static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + static bool rtt_has_format_width_height(const vk::texture &rtt, surface_color_format format, size_t width, size_t height) + { + VkFormat fmt = vk::get_compatible_surface_format(format); + vk::texture &tex = const_cast(rtt); + + if (tex.get_format() == fmt && + tex.width() == width && + tex.height() == height) + return true; + + return false; + } + + static bool ds_has_format_width_height(const vk::texture &ds, surface_depth_format format, size_t width, size_t height) + { + VkFormat fmt = vk::get_compatible_depth_surface_format(format); + vk::texture &tex = const_cast(ds); + + if (tex.get_format() == fmt && + tex.width() == width && + tex.height() == height) + return true; + + return false; + } + + static download_buffer_object issue_download_command(surface_type, surface_color_format color_format, size_t width, size_t height, ...) + { + return nullptr; + } + + static download_buffer_object issue_depth_download_command(surface_type, surface_depth_format depth_format, size_t width, size_t height, ...) + { + return nullptr; + } + + static download_buffer_object issue_stencil_download_command(surface_type, surface_depth_format depth_format, size_t width, size_t height, ...) + { + return nullptr; + } + + gsl::span map_downloaded_buffer(download_buffer_object, ...) + { + return{ (gsl::byte*)nullptr, 0 }; + } + + static void unmap_downloaded_buffer(download_buffer_object, ...) + { + } + + static vk::texture *get(const vk::texture &tex) + { + return const_cast(&tex); + } + }; + + struct vk_render_targets : public rsx::surface_store + { + }; +} diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp new file mode 100644 index 0000000000..78e1d46498 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -0,0 +1,560 @@ +#include "stdafx.h" +#include "VKHelpers.h" +#include "../GCM.h" +#include "../RSXThread.h" +#include "../RSXTexture.h" +#include "../rsx_utils.h" +#include "../Common/TextureUtils.h" + +namespace vk +{ + VkComponentMapping default_component_map() + { + VkComponentMapping result; + result.a = VK_COMPONENT_SWIZZLE_A; + result.r = VK_COMPONENT_SWIZZLE_R; + result.g = VK_COMPONENT_SWIZZLE_G; + result.b = VK_COMPONENT_SWIZZLE_B; + + return result; + } + + VkImageSubresource default_image_subresource() + { + VkImageSubresource subres; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.mipLevel = 0; + subres.arrayLayer = 0; + + return subres; + } + + VkImageSubresourceRange default_image_subresource_range() + { + VkImageSubresourceRange subres; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.baseArrayLayer = 0; + subres.baseMipLevel = 0; + subres.layerCount = 1; + subres.levelCount = 1; + + return subres; + } + + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) + { + VkImageSubresourceLayers a_src, a_dst; + a_src.aspectMask = aspect; + a_src.baseArrayLayer = 0; + a_src.layerCount = 1; + a_src.mipLevel = 0; + + a_dst = a_src; + + VkImageCopy rgn; + rgn.extent.depth = 1; + rgn.extent.width = width; + rgn.extent.height = height; + rgn.dstOffset = { 0, 0, 0 }; + rgn.srcOffset = { 0, 0, 0 }; + rgn.srcSubresource = a_src; + rgn.dstSubresource = a_dst; + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, aspect); + + for (int mip_level = 0; mip_level < mipmaps; ++mip_level) + { + vkCmdCopyImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn); + + rgn.srcSubresource.mipLevel++; + rgn.dstSubresource.mipLevel++; + } + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, aspect); + } + + void copy_scaled_image(VkCommandBuffer cmd, VkImage & src, VkImage & dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_width, u32 src_height, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect) + { + VkImageSubresourceLayers a_src, a_dst; + a_src.aspectMask = aspect; + a_src.baseArrayLayer = 0; + a_src.layerCount = 1; + a_src.mipLevel = 0; + + a_dst = a_src; + + VkImageBlit rgn; + rgn.srcOffsets[0] = { 0, 0, 0 }; + rgn.srcOffsets[1] = { (int32_t)src_width, (int32_t)src_height, 1 }; + rgn.dstOffsets[0] = { 0, 0, 0 }; + rgn.dstOffsets[1] = { (int32_t)dst_width, (int32_t)dst_height, 1 }; + rgn.dstSubresource = a_dst; + rgn.srcSubresource = a_src; + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, aspect); + + for (int mip_level = 0; mip_level < mipmaps; ++mip_level) + { + vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR); + + rgn.srcSubresource.mipLevel++; + rgn.dstSubresource.mipLevel++; + } + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, aspect); + } + + void copy_texture(VkCommandBuffer cmd, texture &src, texture &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) + { + VkImage isrc = (VkImage)src; + VkImage idst = (VkImage)dst; + + copy_image(cmd, isrc, idst, srcLayout, dstLayout, width, height, mipmaps, aspect); + } + + texture::texture(vk::swap_chain_image &img) + { + m_image_contents = img; + m_view = img; + m_sampler = nullptr; + + //We did not create this object, do not allow internal modification! + owner = nullptr; + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + { + owner = &device; + + //First create the image + VkImageCreateInfo image_info; + memset(&image_info, 0, sizeof(image_info)); + + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = nullptr; + image_info.imageType = image_type; + image_info.format = format; + image_info.extent = { width, height, 1 }; + image_info.mipLevels = mipmaps; + image_info.arrayLayers = (image_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)? 6: 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = tiling; + image_info.usage = usage; + image_info.flags = image_flags; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + CHECK_RESULT(vkCreateImage(device, &image_info, nullptr, &m_image_contents)); + + vkGetImageMemoryRequirements(device, m_image_contents, &m_memory_layout); + vram_allocation.allocate_from_pool(device, m_memory_layout.size, m_memory_layout.memoryTypeBits); + + CHECK_RESULT(vkBindImageMemory(device, m_image_contents, vram_allocation, 0)); + + VkImageViewCreateInfo view_info; + view_info.format = format; + view_info.image = m_image_contents; + view_info.pNext = nullptr; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.viewType = view_type; + view_info.components = swizzle; + view_info.subresourceRange = default_image_subresource_range(); + view_info.flags = 0; + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + { + view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT/* | VK_IMAGE_ASPECT_STENCIL_BIT*/; + m_image_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + } + + CHECK_RESULT(vkCreateImageView(device, &view_info, nullptr, &m_view)); + + m_width = width; + m_height = height; + m_mipmaps = mipmaps; + m_internal_format = format; + m_flags = usage; + m_view_type = view_type; + m_usage = usage; + m_tiling = tiling; + + if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT || + usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + { + VkSamplerAddressMode clamp_s = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VkSamplerAddressMode clamp_t = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VkSamplerAddressMode clamp_r = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + + VkSamplerCreateInfo sampler_info; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.addressModeU = clamp_s; + sampler_info.addressModeV = clamp_t; + sampler_info.addressModeW = clamp_r; + sampler_info.anisotropyEnable = VK_FALSE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.pNext = nullptr; + sampler_info.unnormalizedCoordinates = VK_FALSE; + sampler_info.mipLodBias = 0; + sampler_info.maxAnisotropy = 0; + sampler_info.flags = 0; + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + CHECK_RESULT(vkCreateSampler((*owner), &sampler_info, nullptr, &m_sampler)); + } + + ready = true; + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + { + create(device, format, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, 0, usage, tiling, width, height, mipmaps, gpu_only, swizzle); + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + { + VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL; + + if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) + { + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(device.gpu(), format, &props); + + //Enable linear tiling if supported and we request a sampled image.. + if (props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + tiling = VK_IMAGE_TILING_LINEAR; + else + usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + create(device, format, usage, tiling, width, height, mipmaps, gpu_only, swizzle); + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only) + { + create(device, format, usage, width, height, mipmaps, gpu_only, vk::default_component_map()); + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height) + { + create(device, format, usage, width, height, 1, false); + } + + VkSamplerAddressMode texture::vk_wrap_mode(u32 gcm_wrap) + { + switch (gcm_wrap) + { + case CELL_GCM_TEXTURE_WRAP: return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case CELL_GCM_TEXTURE_MIRROR: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case CELL_GCM_TEXTURE_CLAMP_TO_EDGE: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_BORDER: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case CELL_GCM_TEXTURE_CLAMP: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + default: + throw EXCEPTION("unhandled texture clamp mode 0x%X", gcm_wrap); + } + } + + float texture::max_aniso(u32 gcm_aniso) + { + switch (gcm_aniso) + { + case CELL_GCM_TEXTURE_MAX_ANISO_1: return 1.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_2: return 2.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_4: return 4.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_6: return 6.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_8: return 8.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_10: return 10.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_12: return 12.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_16: return 16.0f; + } + + LOG_ERROR(RSX, "Texture anisotropy error: bad max aniso (%d).", gcm_aniso); + return 1.0f; + } + + void texture::sampler_setup(rsx::texture &tex, VkImageViewType type, VkComponentMapping swizzle) + { + VkSamplerAddressMode clamp_s = vk_wrap_mode(tex.wrap_s()); + VkSamplerAddressMode clamp_t = vk_wrap_mode(tex.wrap_t()); + VkSamplerAddressMode clamp_r = vk_wrap_mode(tex.wrap_r()); + + VkSamplerCreateInfo sampler_info; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.addressModeU = clamp_s; + sampler_info.addressModeV = clamp_t; + sampler_info.addressModeW = clamp_r; + sampler_info.anisotropyEnable = VK_TRUE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.pNext = nullptr; + sampler_info.unnormalizedCoordinates = !!(tex.format() & CELL_GCM_TEXTURE_UN); + sampler_info.mipLodBias = tex.bias(); + sampler_info.maxAnisotropy = max_aniso(tex.max_aniso()); + sampler_info.flags = 0; + sampler_info.maxLod = tex.max_lod(); + sampler_info.minLod = tex.min_lod(); + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + CHECK_RESULT(vkCreateSampler((*owner), &sampler_info, nullptr, &m_sampler)); + } + + void texture::init(rsx::texture& tex, vk::command_buffer &cmd, bool ignore_checks) + { + VkImageViewType best_type = VK_IMAGE_VIEW_TYPE_2D; + + if (tex.cubemap() && m_view_type != VK_IMAGE_VIEW_TYPE_CUBE) + { + vk::render_device &dev = (*owner); + VkFormat format = m_internal_format; + VkImageUsageFlags usage = m_usage; + VkImageTiling tiling = m_tiling; + + destroy(); + create(dev, format, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_CUBE, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT, usage, tiling, tex.width(), tex.height(), tex.mipmap(), false, default_component_map()); + } + + if (!tex.cubemap() && tex.depth() > 1 && m_view_type != VK_IMAGE_VIEW_TYPE_3D) + { + best_type = VK_IMAGE_VIEW_TYPE_3D; + + vk::render_device &dev = (*owner); + VkFormat format = m_internal_format; + VkImageUsageFlags usage = m_usage; + VkImageTiling tiling = m_tiling; + + destroy(); + create(dev, format, VK_IMAGE_TYPE_3D, VK_IMAGE_VIEW_TYPE_3D, 0, usage, tiling, tex.width(), tex.height(), tex.mipmap(), false, default_component_map()); + } + + if (!m_sampler) + sampler_setup(tex, best_type, default_component_map()); + + VkImageSubresource subres; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.mipLevel = 0; + subres.arrayLayer = 0; + + u8 *data; + + VkFormatProperties props; + vk::physical_device dev = owner->gpu(); + vkGetPhysicalDeviceFormatProperties(dev, m_internal_format, &props); + + if (ignore_checks || props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + { + std::vector> layout_alignment(tex.mipmap()); + + for (u32 i = 0; i < tex.mipmap(); ++i) + { + layout_alignment[i].first = 4096; + vkGetImageSubresourceLayout((*owner), m_image_contents, &subres, &layout_alignment[i].second); + + if (m_view_type == VK_IMAGE_VIEW_TYPE_CUBE) + layout_alignment[i].second.size *= 6; + + while (layout_alignment[i].first > 1) + { + //Test if is wholly divisible by alignment.. + if (!(layout_alignment[i].second.rowPitch & (layout_alignment[i].first - 1))) + break; + + layout_alignment[i].first >>= 1; + } + + subres.mipLevel++; + } + + if (tex.mipmap() == 1) + { + u32 buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); + if (buffer_size != layout_alignment[0].second.size) + { + if (buffer_size > layout_alignment[0].second.size) + { + LOG_ERROR(RSX, "Layout->pitch = %d, size=%d, height=%d", layout_alignment[0].second.rowPitch, layout_alignment[0].second.size, tex.height()); + LOG_ERROR(RSX, "Computed alignment would have been %d, which yielded a size of %d", layout_alignment[0].first, buffer_size); + LOG_ERROR(RSX, "Retrying..."); + + //layout_alignment[0].first >>= 1; + buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); + + if (buffer_size != layout_alignment[0].second.size) + throw EXCEPTION("Bad texture alignment computation!"); + } + else + { + LOG_ERROR(RSX, "Bad texture alignment computation: expected size=%d bytes, computed=%d bytes, alignment=%d, hw pitch=%d", + layout_alignment[0].second.size, buffer_size, layout_alignment[0].first, layout_alignment[0].second.rowPitch); + } + } + + CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); + gsl::span mapped{ (gsl::byte*)(data + layout_alignment[0].second.offset), gsl::narrow(layout_alignment[0].second.size) }; + + upload_placed_texture(mapped, tex, layout_alignment[0].first); + vkUnmapMemory((*owner), vram_allocation); + } + else + { + auto &layer_props = layout_alignment[layout_alignment.size() - 1].second; + u32 max_size = layer_props.offset + layer_props.size; + + if (m_memory_layout.size < max_size) + { + throw EXCEPTION("Failed to upload texture. Invalid memory block size."); + } + + int index= 0; + std::vector> layout_offset_info(tex.mipmap()); + + for (auto &mip_info : layout_offset_info) + { + auto &alignment = layout_alignment[index].first; + auto &layout = layout_alignment[index++].second; + + mip_info = std::make_pair(layout.offset, layout.rowPitch); + } + + CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); + gsl::span mapped{ (gsl::byte*)(data), gsl::narrow(m_memory_layout.size) }; + + upload_texture_mipmaps(mapped, tex, layout_offset_info); + vkUnmapMemory((*owner), vram_allocation); + } + } + else if (!ignore_checks) + { + if (!staging_texture) + { + staging_texture = new texture(); + staging_texture->create((*owner), m_internal_format, VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_TILING_LINEAR, m_width, m_height, tex.mipmap(), false, default_component_map()); + } + + staging_texture->init(tex, cmd, true); + staging_texture->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + ready = false; + } + } + + void texture::init(rsx::texture &tex, vk::command_buffer &cmd) + { + init(tex, cmd, false); + } + + void texture::flush(vk::command_buffer &cmd) + { + if (!ready) + { + vk::copy_texture(cmd, *staging_texture, *this, staging_texture->get_layout(), m_layout, m_width, m_height, m_mipmaps, m_image_aspect); + ready = true; + } + } + + void texture::init_debug() + { + void *data; + CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); + + memset(data, 0xFF, m_memory_layout.size); + vkUnmapMemory((*owner), vram_allocation); + } + + void texture::change_layout(vk::command_buffer &cmd, VkImageLayout new_layout) + { + if (m_layout == new_layout) return; + + vk::change_image_layout(cmd, m_image_contents, m_layout, new_layout, m_image_aspect); + m_layout = new_layout; + } + + VkImageLayout texture::get_layout() + { + return m_layout; + } + + const u32 texture::width() + { + return m_width; + } + + const u32 texture::height() + { + return m_height; + } + + const u16 texture::mipmaps() + { + return m_mipmaps; + } + + void texture::destroy() + { + if (!owner) return; + + if (m_sampler) + vkDestroySampler((*owner), m_sampler, nullptr); + + //Destroy all objects managed by this object + vkDestroyImageView((*owner), m_view, nullptr); + vkDestroyImage((*owner), m_image_contents, nullptr); + + vram_allocation.destroy(); + + owner = nullptr; + m_sampler = nullptr; + m_view = nullptr; + m_image_contents = nullptr; + + if (staging_texture) + { + staging_texture->destroy(); + delete staging_texture; + staging_texture = nullptr; + } + } + + const VkFormat texture::get_format() + { + return m_internal_format; + } + + texture::operator VkImage() + { + return m_image_contents; + } + + texture::operator VkImageView() + { + return m_view; + } + + texture::operator VkSampler() + { + return m_sampler; + } +} diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h new file mode 100644 index 0000000000..a474f3d1cb --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -0,0 +1,240 @@ +#pragma once +#include "stdafx.h" +#include "VKRenderTargets.h" +#include "VKGSRender.h" +#include "../Common/TextureUtils.h" + +namespace vk +{ + struct cached_texture_object + { + u32 native_rsx_address; + u32 native_rsx_size; + + u16 width; + u16 height; + u16 depth; + u16 mipmaps; + + vk::texture uploaded_texture; + + u64 protected_rgn_start; + u64 protected_rgn_end; + + bool exists = false; + bool locked = false; + bool dirty = true; + }; + + class texture_cache + { + private: + std::vector m_cache; + + bool lock_memory_region(u32 start, u32 size) + { + static const u32 memory_page_size = 4096; + start = start & ~(memory_page_size - 1); + size = (u32)align(size, memory_page_size); + + return vm::page_protect(start, size, 0, 0, vm::page_writable); + } + + bool unlock_memory_region(u32 start, u32 size) + { + static const u32 memory_page_size = 4096; + start = start & ~(memory_page_size - 1); + size = (u32)align(size, memory_page_size); + + return vm::page_protect(start, size, 0, vm::page_writable, 0); + } + + bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) + { + //Check for memory area overlap. unlock page(s) if needed and add this index to array. + //Axis separation test + const u32 &block_start = base1; + const u32 block_end = limit1; + + if (limit2 < block_start) return false; + if (base2 > block_end) return false; + + u32 min_separation = (limit2 - base2) + (limit1 - base1); + u32 range_limit = (block_end > limit2) ? block_end : limit2; + u32 range_base = (block_start < base2) ? block_start : base2; + + u32 actual_separation = (range_limit - range_base); + + if (actual_separation < min_separation) + return true; + + return false; + } + + cached_texture_object& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) + { + for (cached_texture_object &tex : m_cache) + { + if (!tex.dirty && tex.exists && + tex.native_rsx_address == rsx_address && + tex.native_rsx_size == rsx_size) + { + if (!confirm_dimensions) return tex; + + if (tex.width == width && tex.height == height && tex.mipmaps == mipmaps) + return tex; + else + { + LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters."); + LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.width, tex.height); + } + } + } + + for (cached_texture_object &tex : m_cache) + { + if (tex.dirty) + { + if (tex.exists) + { + tex.uploaded_texture.destroy(); + tex.exists = false; + } + + return tex; + } + } + + cached_texture_object object; + m_cache.push_back(object); + + return m_cache[m_cache.size() - 1]; + } + + void lock_object(cached_texture_object &obj) + { + static const u32 memory_page_size = 4096; + obj.protected_rgn_start = obj.native_rsx_address & ~(memory_page_size - 1); + obj.protected_rgn_end = (u32)align(obj.native_rsx_size, memory_page_size); + obj.protected_rgn_end += obj.protected_rgn_start; + + lock_memory_region(obj.protected_rgn_start, obj.native_rsx_size); + } + + void unlock_object(cached_texture_object &obj) + { + unlock_memory_region(obj.protected_rgn_start, obj.native_rsx_size); + } + + public: + + texture_cache() {} + ~texture_cache() {} + + void destroy() + { + for (cached_texture_object &tex : m_cache) + { + if (tex.exists) + { + tex.uploaded_texture.destroy(); + tex.exists = false; + } + } + + m_cache.resize(0); + } + + vk::texture& upload_texture(command_buffer cmd, rsx::texture &tex, rsx::vk_render_targets &m_rtts) + { + const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); + const u32 range = (u32)get_texture_size(tex); + + //First check if it exists as an rtt... + vk::texture *rtt_texture = nullptr; + if (rtt_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) + { + return *rtt_texture; + } + + if (rtt_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) + { + return *rtt_texture; + } + + cached_texture_object& cto = find_cached_texture(texaddr, range, true, tex.width(), tex.height(), tex.mipmap()); + if (cto.exists && !cto.dirty) + { + return cto.uploaded_texture; + } + + u32 raw_format = tex.format(); + u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + VkComponentMapping mapping; + VkFormat vk_format = get_compatible_sampler_format(format, mapping, tex.remap()); + + cto.uploaded_texture.create(*vk::get_current_renderer(), vk_format, VK_IMAGE_USAGE_SAMPLED_BIT, tex.width(), tex.height(), tex.mipmap(), false, mapping); + cto.uploaded_texture.init(tex, cmd); + cto.uploaded_texture.change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + cto.exists = true; + cto.dirty = false; + cto.native_rsx_address = texaddr; + cto.native_rsx_size = range; + cto.width = cto.uploaded_texture.width(); + cto.height = cto.uploaded_texture.height(); + cto.mipmaps = cto.uploaded_texture.mipmaps(); + + lock_object(cto); + + return cto.uploaded_texture; + } + + bool invalidate_address(u32 rsx_address) + { + for (cached_texture_object &tex : m_cache) + { + if (tex.dirty) continue; + + if (rsx_address >= tex.protected_rgn_start && + rsx_address < tex.protected_rgn_end) + { + unlock_object(tex); + + tex.native_rsx_address = 0; + tex.dirty = true; + + return true; + } + } + + return false; + } + + void flush(vk::command_buffer &cmd) + { + //Finish all pending transactions for any cache managed textures.. + for (cached_texture_object &tex : m_cache) + { + if (tex.dirty || !tex.exists) continue; + tex.uploaded_texture.flush(cmd); + } + } + + void merge_dirty_textures(std::list dirty_textures) + { + for (vk::texture &tex : dirty_textures) + { + cached_texture_object cto; + cto.uploaded_texture = tex; + cto.locked = false; + cto.exists = true; + cto.dirty = true; + cto.native_rsx_address = 0; + + m_cache.push_back(cto); + } + } + }; +} diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp new file mode 100644 index 0000000000..7709150aaf --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -0,0 +1,566 @@ +#include "stdafx.h" +#include "Utilities/rPlatform.h" // only for rImage +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/state.h" +#include "VKGSRender.h" +#include "../rsx_methods.h" +#include "../Common/BufferUtils.h" + +namespace vk +{ + bool requires_component_expansion(rsx::vertex_base_type type, u32 size) + { + if (size == 3) + { + switch (type) + { + case rsx::vertex_base_type::f: + return true; + } + } + + return false; + } + + u32 get_suitable_vk_size(rsx::vertex_base_type type, u32 size) + { + if (size == 3) + { + switch (type) + { + case rsx::vertex_base_type::f: + return 16; + } + } + + return rsx::get_vertex_type_size_on_host(type, size); + } + + VkFormat get_suitable_vk_format(rsx::vertex_base_type type, u8 size) + { + /** + * Set up buffer fetches to only work on 4-component access. This is hardware dependant so we use 4-component access to avoid branching based on IHV implementation + * AMD GCN 1.0 for example does not support RGB32 formats for texel buffers + */ + const VkFormat vec1_types[] = { VK_FORMAT_R16_UNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM }; + const VkFormat vec2_types[] = { VK_FORMAT_R16G16_UNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM }; + const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM }; //VEC3 COMPONENTS NOT SUPPORTED! + const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM }; + + const VkFormat* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types }; + + if (type > rsx::vertex_base_type::ub256) + throw EXCEPTION("VKGS error: unknown vertex base type 0x%X.", (u32)type); + + return vec_selectors[size][(int)type]; + } + + VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification) + { + requires_modification = false; + + switch (mode) + { + case rsx::primitive_type::lines: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + case rsx::primitive_type::line_loop: + requires_modification = true; + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case rsx::primitive_type::line_strip: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case rsx::primitive_type::points: + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + case rsx::primitive_type::triangles: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + case rsx::primitive_type::triangle_strip: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + case rsx::primitive_type::triangle_fan: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + case rsx::primitive_type::quads: + case rsx::primitive_type::quad_strip: + case rsx::primitive_type::polygon: + requires_modification = true; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + default: + throw ("Unsupported primitive topology 0x%X", (u8)mode); + } + } + + /** + * Expand line loop array to line strip array; simply loop back the last vertex to the first.. + */ + u32 expand_line_loop_array_to_strip(u32 vertex_draw_count, std::vector& indices) + { + int i = 0; + indices.resize(vertex_draw_count + 1); + + for (; i < vertex_draw_count; ++i) + indices[i] = i; + + indices[i] = 0; + return indices.size(); + } + + template + u32 expand_indexed_line_loop_to_strip(u32 original_count, const T* original_indices, std::vector& indices) + { + indices.resize(original_count + 1); + + int i = 0; + for (; i < original_count; ++i) + indices[i] = original_indices[i]; + + indices[i] = original_indices[0]; + return indices.size(); + } + + /** + * Template: Expand any N-compoent vector to a larger X-component vector and pad unused slots with 1 + */ + template + void expand_array_components(const T* src_data, std::vector& dst_data, u32 vertex_count) + { + u32 dst_size = (vertex_count * dst_components * sizeof(T)); + dst_data.resize(dst_size); + + T* src = const_cast(src_data); + T* dst = reinterpret_cast(dst_data.data()); + + for (u32 index = 0; index < vertex_count; ++index) + { + for (u8 channel = 0; channel < dst_components; channel++) + { + if (channel < src_components) + { + *dst = *src; + + dst++; + src++; + } + else + { + *dst = (T)(padding); + dst++; + } + } + } + } + + template + void copy_inlined_data_to_buffer(void *src_data, void *dst_data, u32 vertex_count, rsx::vertex_base_type type, u8 src_channels, u8 dst_channels, u16 element_size, u16 stride) + { + u8 *src = static_cast(src_data); + u8 *dst = static_cast(dst_data); + + for (u32 i = 0; i < vertex_count; ++i) + { + T* src_ptr = reinterpret_cast(src); + T* dst_ptr = reinterpret_cast(dst); + + switch (type) + { + case rsx::vertex_base_type::ub: + { + if (src_channels == 4) + { + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; + + break; + } + } + default: + { + for (u8 ch = 0; ch < dst_channels; ++ch) + { + if (ch < src_channels) + { + *dst_ptr = *src_ptr; + src_ptr++; + } + else + *dst_ptr = (T)(padding); + + dst_ptr++; + } + } + } + + src += stride; + dst += element_size; + } + } + + void prepare_buffer_for_writing(void *data, rsx::vertex_base_type type, u8 vertex_size, u32 vertex_count) + { + switch (type) + { + case rsx::vertex_base_type::sf: + { + if (vertex_size == 3) + { + /** + * Pad the 4th component for half-float arrays to 1, since texelfetch does not mask components + */ + u16 *dst = reinterpret_cast(data); + for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4) + dst[idx] = 0x3c00; + } + + break; + } + } + } +} + +std::tuple +VKGSRender::upload_vertex_data() +{ + //initialize vertex attributes + std::vector vertex_arrays_data; + + const std::string reg_table[] = + { + "in_pos_buffer", "in_weight_buffer", "in_normal_buffer", + "in_diff_color_buffer", "in_spec_color_buffer", + "in_fog_buffer", + "in_point_size_buffer", "in_7_buffer", + "in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer", + "in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer" + }; + + u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; + + std::vector vertex_index_array; + vertex_draw_count = 0; + u32 min_index, max_index; + + if (draw_command == rsx::draw_command::indexed) + { + rsx::index_array_type type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + u32 type_size = gsl::narrow(get_index_type_size(type)); + for (const auto& first_count : first_count_commands) + { + vertex_draw_count += first_count.second; + } + + vertex_index_array.resize(vertex_draw_count * type_size); + + switch (type) + { + case rsx::index_array_type::u32: + std::tie(min_index, max_index) = write_index_array_data_to_buffer_untouched(gsl::span((u32*)vertex_index_array.data(), vertex_draw_count), first_count_commands); + break; + case rsx::index_array_type::u16: + std::tie(min_index, max_index) = write_index_array_data_to_buffer_untouched(gsl::span((u16*)vertex_index_array.data(), vertex_draw_count), first_count_commands); + break; + } + } + + if (draw_command == rsx::draw_command::inlined_array) + { + u32 stride = 0; + u32 offsets[rsx::limits::vertex_count] = { 0 }; + + for (u32 i = 0; i < rsx::limits::vertex_count; ++i) + { + const auto &info = vertex_arrays_info[i]; + if (!info.size) continue; + + offsets[i] = stride; + stride += rsx::get_vertex_type_size_on_host(info.type, info.size); + } + + vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; + + for (int index = 0; index < rsx::limits::vertex_count; ++index) + { + auto &vertex_info = vertex_arrays_info[index]; + + if (!m_program->has_uniform(vk::glsl::glsl_vertex_program, reg_table[index])) + continue; + + if (!vertex_info.size) // disabled + { + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index]); + continue; + } + + const u32 host_element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); + const u32 element_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size); + const u32 data_size = element_size * vertex_draw_count; + const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size); + + vertex_arrays_data.resize(data_size); + u8 *src = reinterpret_cast(inline_vertex_array.data()); + u8 *dst = vertex_arrays_data.data(); + + src += offsets[index]; + u8 opt_size = vertex_info.size; + + if (vertex_info.size == 3) + opt_size = 4; + + //TODO: properly handle cmp type + if (vertex_info.type == rsx::vertex_base_type::cmp) + LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet"); + + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + case rsx::vertex_base_type::sf: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + case rsx::vertex_base_type::s1: + case rsx::vertex_base_type::ub: + case rsx::vertex_base_type::ub256: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + case rsx::vertex_base_type::s32k: + case rsx::vertex_base_type::cmp: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + default: + throw EXCEPTION("Unknown base type %d", vertex_info.type); + } + + auto &buffer = m_attrib_buffers[index]; + + buffer.sub_data(0, data_size, vertex_arrays_data.data()); + buffer.set_format(format); + + //Link texture to uniform location + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index], buffer, true); + } + } + + if (draw_command == rsx::draw_command::array) + { + for (const auto &first_count : first_count_commands) + { + vertex_draw_count += first_count.second; + } + } + + if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed) + { + for (int index = 0; index < rsx::limits::vertex_count; ++index) + { + if (!m_program->has_uniform(vk::glsl::glsl_vertex_program, reg_table[index])) + continue; + + bool enabled = !!(input_mask & (1 << index)); + + if (!enabled) + { + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index]); + continue; + } + + if (vertex_arrays_info[index].size > 0) + { + auto &vertex_info = vertex_arrays_info[index]; + // Active vertex array + std::vector vertex_array; + + // Fill vertex_array + u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); + vertex_array.resize(vertex_draw_count * element_size); + + // Get source pointer + u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; + u32 offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index]; + u32 address = base_offset + rsx::get_address(offset & 0x7fffffff, offset >> 31); + const gsl::byte *src_ptr = gsl::narrow_cast(vm::base(address)); + + u32 num_stored_verts = vertex_draw_count; + + if (draw_command == rsx::draw_command::array) + { + size_t offset = 0; + gsl::span dest_span(vertex_array); + vk::prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + + for (const auto &first_count : first_count_commands) + { + write_vertex_array_data_to_buffer(dest_span.subspan(offset), src_ptr, first_count.first, first_count.second, vertex_info.type, vertex_info.size, vertex_info.stride); + offset += first_count.second * element_size; + } + } + if (draw_command == rsx::draw_command::indexed) + { + num_stored_verts = (max_index + 1); + vertex_array.resize((max_index + 1) * element_size); + gsl::span dest_span(vertex_array); + vk::prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + + write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride); + } + + std::vector converted_buffer; + void *data_ptr = vertex_array.data(); + + if (vk::requires_component_expansion(vertex_info.type, vertex_info.size)) + { + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + vk::expand_array_components(reinterpret_cast(vertex_array.data()), converted_buffer, num_stored_verts); + break; + } + + data_ptr = static_cast(converted_buffer.data()); + } + + const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size); + const u32 data_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size) * num_stored_verts; + + auto &buffer = m_attrib_buffers[index]; + + buffer.sub_data(0, data_size, data_ptr); + buffer.set_format(format); + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index], buffer, true); + } + else if (register_vertex_info[index].size > 0) + { + //Untested! + auto &vertex_data = register_vertex_data[index]; + auto &vertex_info = register_vertex_info[index]; + + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + { + size_t data_size = vertex_data.size(); + const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size); + + std::vector converted_buffer; + void *data_ptr = vertex_data.data(); + + if (vk::requires_component_expansion(vertex_info.type, vertex_info.size)) + { + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + { + const u32 num_stored_verts = data_size / (sizeof(float) * vertex_info.size); + vk::expand_array_components(reinterpret_cast(vertex_data.data()), converted_buffer, num_stored_verts); + break; + } + } + + data_ptr = static_cast(converted_buffer.data()); + data_size = converted_buffer.size(); + } + + auto &buffer = m_attrib_buffers[index]; + + buffer.sub_data(0, data_size, data_ptr); + buffer.set_format(format); + + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index], buffer, true); + break; + } + default: + LOG_ERROR(RSX, "bad non array vertex data format (type = %d, size = %d)", vertex_info.type, vertex_info.size); + break; + } + } + } + } + + bool is_indexed_draw = (draw_command == rsx::draw_command::indexed); + bool index_buffer_filled = false; + bool primitives_emulated = false; + u32 index_count = vertex_draw_count; + + VkIndexType index_format = VK_INDEX_TYPE_UINT16; + VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_mode, primitives_emulated); + + if (primitives_emulated) + { + //Line loops are line-strips with loop-back; using line-strips-with-adj doesnt work for vulkan + if (draw_mode == rsx::primitive_type::line_loop) + { + std::vector indices; + + if (!is_indexed_draw) + { + index_count = vk::expand_line_loop_array_to_strip(vertex_draw_count, indices); + m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data()); + } + else + { + rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + if (indexed_type == rsx::index_array_type::u32) + { + index_format = VK_INDEX_TYPE_UINT32; + std::vector indices32; + + index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u32*)vertex_index_array.data(), indices32); + m_index_buffer.sub_data(0, index_count*sizeof(u32), indices32.data()); + } + else + { + index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u16*)vertex_index_array.data(), indices); + m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data()); + } + } + } + else + { + index_count = get_index_count(draw_mode, vertex_draw_count); + std::vector indices(index_count); + + if (is_indexed_draw) + { + rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + size_t index_size = get_index_type_size(indexed_type); + + std::vector> ranges; + ranges.push_back(std::pair(0, vertex_draw_count)); + + gsl::span dst = { (u16*)indices.data(), gsl::narrow(index_count) }; + write_index_array_data_to_buffer(dst, draw_mode, ranges); + } + else + { + write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast(indices.data()), draw_mode, 0, vertex_draw_count); + } + + m_index_buffer.sub_data(0, index_count * sizeof(u16), indices.data()); + } + + is_indexed_draw = true; + index_buffer_filled = true; + } + + if (!index_buffer_filled && is_indexed_draw) + { + rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + index_format = VK_INDEX_TYPE_UINT16; + VkFormat fmt = VK_FORMAT_R16_UINT; + + u32 elem_size = get_index_type_size(indexed_type); + + if (indexed_type == rsx::index_array_type::u32) + { + index_format = VK_INDEX_TYPE_UINT32; + fmt = VK_FORMAT_R32_UINT; + } + + u32 index_sz = vertex_index_array.size() / elem_size; + if (index_sz != vertex_draw_count) + LOG_ERROR(RSX, "Vertex draw count mismatch!"); + + m_index_buffer.sub_data(0, vertex_index_array.size(), vertex_index_array.data()); + m_index_buffer.set_format(fmt); //Unnecessary unless viewing contents in sampler... + } + + return std::make_tuple(prims, is_indexed_draw, index_count, index_format); +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp new file mode 100644 index 0000000000..f113d29c4e --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -0,0 +1,301 @@ +#include "stdafx.h" +#include "Emu/System.h" + +#include "VKVertexProgram.h" +#include "VKCommonDecompiler.h" +#include "VKHelpers.h" + +std::string VKVertexDecompilerThread::getFloatTypeName(size_t elementCount) +{ + return vk::getFloatTypeNameImpl(elementCount); +} + +std::string VKVertexDecompilerThread::getIntTypeName(size_t elementCount) +{ + return "ivec4"; +} + + +std::string VKVertexDecompilerThread::getFunction(FUNCTION f) +{ + return vk::getFunctionImpl(f); +} + +std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) +{ + return vk::compareFunctionImpl(f, Op0, Op1); +} + +void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) +{ + OS << "#version 450" << std::endl << std::endl; + OS << "#extension GL_ARB_separate_shader_objects : enable" << std::endl; + OS << "layout(std140, set=0, binding = 0) uniform ScaleOffsetBuffer" << std::endl; + OS << "{" << std::endl; + OS << " mat4 scaleOffsetMat;" << std::endl; + OS << " float fog_param0;\n"; + OS << " float fog_param1;\n"; + OS << "};" << std::endl; + + vk::glsl::program_input in; + in.location = 0; + in.domain = vk::glsl::glsl_vertex_program; + in.name = "ScaleOffsetBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector& inputs) +{ + std::vector> input_data; + for (const ParamType &PT : inputs) + { + for (const ParamItem &PI : PT.items) + { + input_data.push_back(std::make_tuple(PI.location, PI.name)); + } + } + + /** + * Its is important that the locations are in the order that vertex attributes are expected. + * If order is not adhered to, channels may be swapped leading to corruption + */ + + std::sort(input_data.begin(), input_data.end()); + + int location = 2; + for (const std::tuple item : input_data) + { + for (const ParamType &PT : inputs) + { + for (const ParamItem &PI : PT.items) + { + if (PI.name == std::get<1>(item)) + { + vk::glsl::program_input in; + in.location = location; + in.domain = vk::glsl::glsl_vertex_program; + in.name = PI.name + "_buffer"; + in.type = vk::glsl::input_type_texel_buffer; + + this->inputs.push_back(in); + + OS << "layout(set=0, binding=" << location++ << ")" << " uniform samplerBuffer" << " " << PI.name << "_buffer;" << std::endl; + } + } + } + } +} + +void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) +{ + OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer" << std::endl; + OS << "{" << std::endl; + OS << " vec4 vc[468];" << std::endl; + OS << "};" << std::endl; + + vk::glsl::program_input in; + in.location = 1; + in.domain = vk::glsl::glsl_vertex_program; + in.name = "VertexConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +struct reg_info +{ + std::string name; + bool need_declare; + std::string src_reg; + std::string src_reg_mask; + bool need_cast; +}; + +static const reg_info reg_table[] = +{ + { "gl_Position", false, "dst_reg0", "", false }, + { "diff_color", true, "dst_reg1", "", false }, + { "spec_color", true, "dst_reg2", "", false }, + { "front_diff_color", true, "dst_reg3", "", false }, + { "front_spec_color", true, "dst_reg4", "", false }, + { "fog_c", true, "dst_reg5", ".xxxx", true }, + { "gl_ClipDistance[0]", false, "dst_reg5", ".y", false }, + { "gl_ClipDistance[1]", false, "dst_reg5", ".z", false }, + { "gl_ClipDistance[2]", false, "dst_reg5", ".w", false }, + { "gl_PointSize", false, "dst_reg6", ".x", false }, + { "gl_ClipDistance[3]", false, "dst_reg6", ".y", false }, + { "gl_ClipDistance[4]", false, "dst_reg6", ".z", false }, + { "gl_ClipDistance[5]", false, "dst_reg6", ".w", false }, + { "tc0", true, "dst_reg7", "", false }, + { "tc1", true, "dst_reg8", "", false }, + { "tc2", true, "dst_reg9", "", false }, + { "tc3", true, "dst_reg10", "", false }, + { "tc4", true, "dst_reg11", "", false }, + { "tc5", true, "dst_reg12", "", false }, + { "tc6", true, "dst_reg13", "", false }, + { "tc7", true, "dst_reg14", "", false }, + { "tc8", true, "dst_reg15", "", false }, + { "tc9", true, "dst_reg6", "", false } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15. +}; + +void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::vector & outputs) +{ + for (auto &i : reg_table) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", i.src_reg) && i.need_declare) + { + const vk::varying_register_t ® = vk::get_varying_register(i.name); + + // if (i.name == "fogc") + // OS << "layout(location=" << reg.reg_location << ") out vec4 fog_c;" << std::endl; + // else + OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";" << std::endl; + } + } +} + +namespace vk +{ + void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector &inputs) + { + for (const auto &real_input : inputs) + { + if (real_input.location != PI.location) + continue; + + if (!real_input.is_array) + { + OS << " vec4 " << PI.name << " = texelFetch(" << PI.name << "_buffer, 0);" << std::endl; + return; + } + + if (real_input.frequency > 1) + { + if (real_input.is_modulo) + { + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexIndex %" << real_input.frequency << ");" << std::endl; + return; + } + + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexIndex /" << real_input.frequency << ");" << std::endl; + return; + } + + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba;" << std::endl; + return; + } + + OS << " vec4 " << PI.name << " = vec4(0., 0., 0., 1.);" << std::endl; + } +} + +void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) +{ + vk::insert_glsl_legacy_function(OS); + + OS << "void main()" << std::endl; + OS << "{" << std::endl; + + // Declare inside main function + for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) + { + for (const ParamItem &PI : PT.items) + { + OS << " " << PT.type << " " << PI.name; + if (!PI.value.empty()) + OS << " = " << PI.value; + OS << ";" << std::endl; + } + } + + for (const ParamType &PT : m_parr.params[PF_PARAM_IN]) + { + for (const ParamItem &PI : PT.items) + vk::add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs); + } +} + +void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) +{ + for (auto &i : reg_table) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", i.src_reg)) + OS << " " << i.name << " = " << i.src_reg << i.src_reg_mask << ";" << std::endl; + } + + OS << " gl_Position = gl_Position * scaleOffsetMat;" << std::endl; + OS << "}" << std::endl; +} + + +void VKVertexDecompilerThread::Task() +{ + m_shader = Decompile(); + vk_prog->SetInputs(inputs); +} + +VKVertexProgram::VKVertexProgram() +{ +} + +VKVertexProgram::~VKVertexProgram() +{ + Delete(); +} + +void VKVertexProgram::Decompile(const RSXVertexProgram& prog) +{ + VKVertexDecompilerThread decompiler(prog, shader, parr, *this); + decompiler.Task(); +} + +void VKVertexProgram::Compile() +{ + fs::file(fs::get_config_dir() + "VertexProgram.vert", fom::rewrite).write(shader); + + std::vector spir_v; + if (!vk::compile_glsl_to_spv(shader, vk::glsl::glsl_vertex_program, spir_v)) + throw EXCEPTION("Failed to compile vertex shader"); + + VkShaderModuleCreateInfo vs_info; + vs_info.codeSize = spir_v.size() * sizeof(u32); + vs_info.pNext = nullptr; + vs_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + vs_info.pCode = (uint32_t*)spir_v.data(); + vs_info.flags = 0; + + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkCreateShaderModule(dev, &vs_info, nullptr, &handle); + + id = (u32)(handle); +} + +void VKVertexProgram::Delete() +{ + shader.clear(); + + if (handle) + { + if (Emu.IsStopped()) + { + LOG_WARNING(RSX, "VKVertexProgram::Delete(): vkDestroyShaderModule(0x%X) avoided", handle); + } + else + { + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkDestroyShaderModule(dev, handle, nullptr); + } + + handle = nullptr; + } +} + +void VKVertexProgram::SetInputs(std::vector& inputs) +{ + for (auto &it : inputs) + { + uniforms.push_back(it); + } +} diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h new file mode 100644 index 0000000000..f914460bb2 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -0,0 +1,58 @@ +#pragma once +#include "../Common/VertexProgramDecompiler.h" +#include "Emu/RSX/RSXVertexProgram.h" +#include "Utilities/Thread.h" +#include "VulkanAPI.h" +#include "../VK/VKHelpers.h" + +struct VKVertexDecompilerThread : public VertexProgramDecompiler +{ + std::string &m_shader; + std::vector inputs; + class VKVertexProgram *vk_prog; +protected: + virtual std::string getFloatTypeName(size_t elementCount) override; + std::string getIntTypeName(size_t elementCount) override; + virtual std::string getFunction(FUNCTION) override; + virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override; + + virtual void insertHeader(std::stringstream &OS) override; + virtual void insertInputs(std::stringstream &OS, const std::vector &inputs) override; + virtual void insertConstants(std::stringstream &OS, const std::vector &constants) override; + virtual void insertOutputs(std::stringstream &OS, const std::vector &outputs) override; + virtual void insertMainStart(std::stringstream &OS) override; + virtual void insertMainEnd(std::stringstream &OS) override; + + const RSXVertexProgram &rsx_vertex_program; +public: + VKVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray& parr, class VKVertexProgram &dst) + : VertexProgramDecompiler(prog) + , m_shader(shader) + , rsx_vertex_program(prog) + , vk_prog(&dst) + { + } + + void Task(); + const std::vector& get_inputs() { return inputs; } +}; + +class VKVertexProgram +{ +public: + VKVertexProgram(); + ~VKVertexProgram(); + + ParamArray parr; + VkShaderModule handle = nullptr; + int id; + std::string shader; + std::vector uniforms; + + void Decompile(const RSXVertexProgram& prog); + void Compile(); + void SetInputs(std::vector& inputs); + +private: + void Delete(); +}; diff --git a/rpcs3/Emu/RSX/VK/VulkanAPI.cpp b/rpcs3/Emu/RSX/VK/VulkanAPI.cpp new file mode 100644 index 0000000000..1577c4e3bc --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VulkanAPI.cpp @@ -0,0 +1 @@ +#include "stdafx.h" \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VulkanAPI.h b/rpcs3/Emu/RSX/VK/VulkanAPI.h new file mode 100644 index 0000000000..4358259ed8 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VulkanAPI.h @@ -0,0 +1,16 @@ +#pragma once + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#define VK_USE_PLATFORM_XLIB_KHR +#endif + +#include +#include +#include "Utilities/types.h" + +namespace vk +{ + void init(); +} diff --git a/rpcs3/Gui/SettingsDialog.cpp b/rpcs3/Gui/SettingsDialog.cpp index be5d1535bd..a5a6e61d12 100644 --- a/rpcs3/Gui/SettingsDialog.cpp +++ b/rpcs3/Gui/SettingsDialog.cpp @@ -252,6 +252,8 @@ SettingsDialog::SettingsDialog(wxWindow *parent, rpcs3::config_t* cfg) } #endif + cbox_gs_render->Append("Vulkan"); + for (int i = 1; i < WXSIZEOF(ResolutionTable); ++i) { cbox_gs_resolution->Append(wxString::Format("%dx%d", ResolutionTable[i].width.value(), ResolutionTable[i].height.value())); diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 1125a2a385..5713b9329f 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -1,125 +1,141 @@ - - - - - Debug - LLVM - x64 - - - Debug - MemLeak - x64 - - - Debug - x64 - - - Release - LLVM - x64 - - - Release - x64 - - - - - - - - - - - {c4a10229-4712-4bd2-b63e-50d93c67a038} - - - - {3EE5F075-B546-42C4-B6A8-E3CCEF38B78D} - Win32Proj - VKGSRender - 8.1 - - - - StaticLibrary - v140 - Unicode - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Level3 - Disabled - _DEBUG;_LIB;%(PreprocessorDefinitions) - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - Windows - - - - - Level3 - - - MaxSpeed - true - true - NDEBUG;_LIB;%(PreprocessorDefinitions) - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - Windows - true - true - - - - - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - - - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - - - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - - - + + + + + Debug - LLVM + x64 + + + Debug - MemLeak + x64 + + + Debug + x64 + + + Release - LLVM + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + {c4a10229-4712-4bd2-b63e-50d93c67a038} + + + + {3EE5F075-B546-42C4-B6A8-E3CCEF38B78D} + Win32Proj + VKGSRender + 8.1 + + + + StaticLibrary + v140 + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + _DEBUG;_LIB;%(PreprocessorDefinitions) + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + Windows + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + Windows + true + true + + + + + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + + + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + + + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + + + \ No newline at end of file diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index 1868ae93ea..22eb36cdee 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -1,19 +1,67 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - - - Source Files - - - - - Source Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + \ No newline at end of file diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index 899bea4107..92ae1d3b03 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -144,7 +144,7 @@ bool Rpcs3App::OnInit() case rsx_renderer_type::OpenGL: return std::make_shared(); #ifdef _MSC_VER case rsx_renderer_type::DX12: return std::make_shared(); - case rsx_renderer_type::Vulkan: return std::make_shared<>(VKGSRender); + case rsx_renderer_type::Vulkan: return std::make_shared(); #endif default: throw EXCEPTION("Invalid GS Renderer %d", (int)mode); } diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 02ef058685..63b3a58304 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -94,12 +94,12 @@ ..\minidx9\Include;..\OpenAL\include;..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - ..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - VKstatic.1.lib;glslang.lib;%(AdditionalDependencies) + ..\Vulkan\glslang-build\SPIRV\Debug;..\Vulkan\glslang-build\OGLCompilersDLL\Debug;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Debug;..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Debug;..\Vulkan\glslang-build\OGLCompilersDLL\Debug;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Debug;..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Debug;..\Vulkan\glslang-build\OGLCompilersDLL\Debug;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Debug;..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Release;..\Vulkan\glslang-build\OGLCompilersDLL\Release;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Release;..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Release;..\Vulkan\glslang-build\OGLCompilersDLL\Release;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Release;..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + VKstatic.1.lib;glslang.lib;OSDependent.lib;OGLCompiler.lib;SPIRV.lib;%(AdditionalDependencies) From 22d49ae96ab96417a7fa8067e92104272f8af64b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 7 Mar 2016 11:38:00 +0300 Subject: [PATCH 03/13] Fix build for non-WIN32 platforms Link explicitly against vkstatic.1.lib fix linux path for vulkan glsl compiler Restore wxWidgets version --- rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 8 +++++--- rpcs3/Emu/RSX/VK/VKGSRender.h | 2 ++ rpcs3/Emu/RSX/VK/VKHelpers.h | 3 +++ rpcs3/Gui/SettingsDialog.cpp | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 2d00f5d697..b3f943714c 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -1,6 +1,6 @@ #include "stdafx.h" #include "VKCommonDecompiler.h" -#include "../VulKan/glslang/SPIRV/GlslangToSpv.h" +#include "../../../../Vulkan/glslang/SPIRV/GlslangToSpv.h" namespace vk { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ae9fbacb6d..29839bd111 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -160,15 +160,17 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan) { shaders_cache.load(rsx::shader_language::glsl); - HINSTANCE hInstance = NULL; - HWND hWnd = (HWND)m_frame->handle(); - m_thread_context.createInstance("RPCS3"); m_thread_context.makeCurrentInstance(1); m_thread_context.enable_debugging(); +#ifdef _WIN32 + HINSTANCE hInstance = NULL; + HWND hWnd = (HWND)m_frame->handle(); + std::vector& gpus = m_thread_context.enumerateDevices(); m_swap_chain = m_thread_context.createSwapChain(hInstance, hWnd, gpus[0]); +#endif m_device = (vk::render_device *)(&m_swap_chain->get_device()); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index d4d930fc6c..808283fb7e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -9,6 +9,8 @@ #include "VKProgramBuffer.h" #include "../GCM.h" +#pragma comment(lib, "VKstatic.1.lib") + class VKGSRender : public GSRender { private: diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 6bb3cf692a..9c4a9a4491 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1044,6 +1044,7 @@ namespace vk return gpus; } +#ifdef _WIN32 vk::swap_chain* createSwapChain(HINSTANCE hInstance, HWND hWnd, vk::physical_device &dev) { VkWin32SurfaceCreateInfoKHR createInfo; @@ -1133,6 +1134,8 @@ namespace vk return new swap_chain(dev, presentQueueNodeIndex, graphicsQueueNodeIndex, format, surface, color_space); } +#endif //if _WIN32 + }; class descriptor_pool diff --git a/rpcs3/Gui/SettingsDialog.cpp b/rpcs3/Gui/SettingsDialog.cpp index a5a6e61d12..b191917fb6 100644 --- a/rpcs3/Gui/SettingsDialog.cpp +++ b/rpcs3/Gui/SettingsDialog.cpp @@ -250,9 +250,9 @@ SettingsDialog::SettingsDialog(wxWindow *parent, rpcs3::config_t* cfg) cbox_gs_d3d_adaptater->Enable(false); chbox_gs_overlay->Enable(false); } -#endif cbox_gs_render->Append("Vulkan"); +#endif for (int i = 1; i < WXSIZEOF(ResolutionTable); ++i) { From d58bd1c916660146b0360367adf6727fca9c3436 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 7 Mar 2016 17:24:32 +0300 Subject: [PATCH 04/13] Exclude apple from vulkan windowing stuff as it is not supported --- rpcs3/Emu/RSX/VK/VulkanAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/VulkanAPI.h b/rpcs3/Emu/RSX/VK/VulkanAPI.h index 4358259ed8..622d3b3803 100644 --- a/rpcs3/Emu/RSX/VK/VulkanAPI.h +++ b/rpcs3/Emu/RSX/VK/VulkanAPI.h @@ -2,7 +2,7 @@ #ifdef _WIN32 #define VK_USE_PLATFORM_WIN32_KHR -#else +#elif !defined __APPLE__ #define VK_USE_PLATFORM_XLIB_KHR #endif From bd52bcf8d4bc3fa8af34e719684a918c32523f9e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 7 Mar 2016 17:55:02 +0300 Subject: [PATCH 05/13] Fix nvidia crash (API version). Fix linux builds Properly set up vulkan API version when creating instance Fix gcc error about passing function result by reference Fix alot of warnings in VKGSRender project More fixes for gcc Fix texture create function --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 2 +- rpcs3/Emu/RSX/Common/TextureUtils.h | 2 +- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 2 +- rpcs3/Emu/RSX/VK/VKFragmentProgram.h | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 10 +++----- rpcs3/Emu/RSX/VK/VKHelpers.h | 34 +++++++++++++------------- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 4 +-- rpcs3/Emu/RSX/VK/VKTexture.cpp | 20 +++++++-------- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 4 +-- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 2 +- rpcs3/Emu/RSX/VK/VKVertexProgram.h | 2 +- 11 files changed, 40 insertions(+), 44 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index d504d2ee03..1f10a6410a 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -340,7 +340,7 @@ std::vector upload_placed_texture(gsl::span mapped_b /** * Upload texture mipmaps where alignment and offset information is provided manually */ -void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info) +void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info) { u16 w = texture.width(), h = texture.height(); u16 depth; diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index b327faa902..7614066f23 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -29,7 +29,7 @@ std::vector upload_placed_texture(gsl::span mapped_b * alignment_offset info is an array of N mipmaps providing the offset into the data block and row-pitch alignment of each * mipmap level individually. */ -void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info); +void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info); /** * Get number of bytes occupied by texture in RSX mem diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 750df896cd..958b8e9283 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -290,7 +290,7 @@ void VKFragmentProgram::Compile() VkDevice dev = (VkDevice)*vk::get_current_renderer(); vkCreateShaderModule(dev, &fs_info, nullptr, &handle); - id = (u32)(handle); + id = (u32)((u64)handle); } void VKFragmentProgram::Delete() diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index c7fa7b922d..2bb9c29efb 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -47,7 +47,7 @@ public: ParamArray parr; VkShaderModule handle = nullptr; - int id; + u32 id; std::string shader; std::vector FragmentConstantOffsetCache; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 29839bd111..de9d617a66 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -412,7 +412,7 @@ void VKGSRender::end() } } - auto &upload_info = upload_vertex_data(); + auto upload_info = upload_vertex_data(); m_program->set_primitive_topology(std::get<0>(upload_info)); m_program->use(m_command_buffer, m_render_pass, 0); @@ -435,9 +435,6 @@ void VKGSRender::end() end_command_buffer_recording(); execute_command_buffer(false); - //Finish() - vkDeviceWaitIdle((*m_device)); - rsx::thread::end(); } @@ -519,7 +516,7 @@ void VKGSRender::clear_surface(u32 mask) init_buffers(); float depth_clear = 1.f; - u32 stencil_clear = 0.f; + u32 stencil_clear = 0; VkClearValue depth_stencil_clear_values, color_clear_values; VkImageSubresourceRange depth_range = vk::default_image_subresource_range(); @@ -874,7 +871,6 @@ void VKGSRender::prepare_rtts() (*m_device), &m_command_buffer); //Bind created rtts as current fbo... - VkImageView attachments[5]; std::vector draw_buffers = vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])); m_framebuffer.destroy(); @@ -904,7 +900,7 @@ void VKGSRender::prepare_rtts() init_render_pass(vk::get_compatible_surface_format(m_surface.color_format), vk::get_compatible_depth_surface_format(m_surface.depth_format), - draw_buffers.size(), + (u8)draw_buffers.size(), draw_buffers.data()); } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 9c4a9a4491..43dc9f2e3b 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -99,7 +99,7 @@ namespace vk uint32_t get_queue_count() { if (queue_props.size()) - return queue_props.size(); + return (u32)queue_props.size(); uint32_t count = 0; vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr); @@ -239,13 +239,13 @@ namespace vk { VkDeviceMemory vram = nullptr; vk::render_device *owner = nullptr; - u32 vram_block_sz = 0; + u64 vram_block_sz = 0; public: memory_block() {} ~memory_block() {} - void allocate_from_pool(vk::render_device &device, u32 block_sz, u32 typeBits) + void allocate_from_pool(vk::render_device &device, u64 block_sz, u32 typeBits) { if (vram) destroy(); @@ -322,9 +322,9 @@ namespace vk texture() {} ~texture() {} - void create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); - void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); - void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); + void create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle); void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only); void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height); void destroy(); @@ -360,7 +360,7 @@ namespace vk vk::render_device *owner; vk::memory_block vram; - u32 m_size = 0; + u64 m_size = 0; bool viewable = false; @@ -368,7 +368,7 @@ namespace vk buffer() {} ~buffer() {} - void create(vk::render_device &dev, u32 size, VkFormat format, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags) + void create(vk::render_device &dev, u64 size, VkFormat format, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags) { if (m_buffer) throw EXCEPTION("Buffer create called on an existing buffer!"); @@ -504,7 +504,7 @@ namespace vk m_internal_format = format; } - u32 size() + u64 size() { return m_size; } @@ -784,7 +784,7 @@ namespace vk u32 get_swap_image_count() { - return m_swap_images.size(); + return (u32)m_swap_images.size(); } vk::swap_chain_image& get_swap_chain_image(const int index) @@ -959,7 +959,7 @@ namespace vk app.applicationVersion = 0; app.pEngineName = app_name; app.engineVersion = 0; - app.apiVersion = (1, 0, 0); + app.apiVersion = VK_MAKE_VERSION(1, 0, 0); //Set up instance information const char *requested_extensions[] = @@ -993,7 +993,7 @@ namespace vk if (error != VK_SUCCESS) throw EXCEPTION("Undefined trap"); m_vk_instances.push_back(instance); - return m_vk_instances.size(); + return (u32)m_vk_instances.size(); } void makeCurrentInstance(uint32_t instance_id) @@ -1037,7 +1037,7 @@ namespace vk CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, pdevs.data())); - for (int i = 0; i < num_gpus; ++i) + for (u32 i = 0; i < num_gpus; ++i) gpus[i].set_device(pdevs[i]); } @@ -1060,7 +1060,7 @@ namespace vk uint32_t device_queues = dev.get_queue_count(); std::vector supportsPresent(device_queues); - for (int index = 0; index < device_queues; index++) + for (u32 index = 0; index < device_queues; index++) { vkGetPhysicalDeviceSurfaceSupportKHR(dev, index, surface, &supportsPresent[index]); } @@ -1070,7 +1070,7 @@ namespace vk uint32_t graphicsQueueNodeIndex = UINT32_MAX; uint32_t presentQueueNodeIndex = UINT32_MAX; - for (int i = 0; i < device_queues; i++) + for (u32 i = 0; i < device_queues; i++) { if ((dev.get_queue_properties(i).queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) { @@ -1206,8 +1206,8 @@ namespace vk { VkBufferView buffer_view = nullptr; VkBuffer buffer = nullptr; - u32 offset = 0; - u32 size = 0; + u64 offset = 0; + u64 size = 0; }; struct program_input diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index faa9e7a328..2a60abd910 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -725,7 +725,7 @@ namespace vk uniform.domain == domain) { VkBuffer buf = _buffer; - u32 size = _buffer.size(); + u64 size = _buffer.size(); if (uniform.as_buffer.buffer != buf || uniform.as_buffer.size != size) @@ -760,7 +760,7 @@ namespace vk { VkBuffer buf = _buffer; VkBufferView view = _buffer; - u32 size = _buffer.size(); + u64 size = _buffer.size(); if (uniform.as_buffer.buffer != buf || uniform.as_buffer.buffer_view != view || diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 78e1d46498..718897c366 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -66,7 +66,7 @@ namespace vk if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, aspect); - for (int mip_level = 0; mip_level < mipmaps; ++mip_level) + for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { vkCmdCopyImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn); @@ -105,7 +105,7 @@ namespace vk if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, aspect); - for (int mip_level = 0; mip_level < mipmaps; ++mip_level) + for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR); @@ -138,7 +138,7 @@ namespace vk owner = nullptr; } - void texture::create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + void texture::create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle) { owner = &device; @@ -224,12 +224,12 @@ namespace vk ready = true; } - void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle) { create(device, format, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, 0, usage, tiling, width, height, mipmaps, gpu_only, swizzle); } - void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle) { VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL; @@ -250,7 +250,7 @@ namespace vk void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only) { - create(device, format, usage, width, height, mipmaps, gpu_only, vk::default_component_map()); + create(device, format, usage, width, height, mipmaps, gpu_only, default_component_map()); } void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height) @@ -390,7 +390,7 @@ namespace vk if (tex.mipmap() == 1) { - u32 buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); + u64 buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); if (buffer_size != layout_alignment[0].second.size) { if (buffer_size > layout_alignment[0].second.size) @@ -421,7 +421,7 @@ namespace vk else { auto &layer_props = layout_alignment[layout_alignment.size() - 1].second; - u32 max_size = layer_props.offset + layer_props.size; + u64 max_size = layer_props.offset + layer_props.size; if (m_memory_layout.size < max_size) { @@ -429,14 +429,14 @@ namespace vk } int index= 0; - std::vector> layout_offset_info(tex.mipmap()); + std::vector> layout_offset_info(tex.mipmap()); for (auto &mip_info : layout_offset_info) { auto &alignment = layout_alignment[index].first; auto &layout = layout_alignment[index++].second; - mip_info = std::make_pair(layout.offset, layout.rowPitch); + mip_info = std::make_pair(layout.offset, (u32)layout.rowPitch); } CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 7709150aaf..7fc83fa3a1 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -92,7 +92,7 @@ namespace vk */ u32 expand_line_loop_array_to_strip(u32 vertex_draw_count, std::vector& indices) { - int i = 0; + u32 i = 0; indices.resize(vertex_draw_count + 1); for (; i < vertex_draw_count; ++i) @@ -107,7 +107,7 @@ namespace vk { indices.resize(original_count + 1); - int i = 0; + u32 i = 0; for (; i < original_count; ++i) indices[i] = original_indices[i]; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index f113d29c4e..6daf852435 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -269,7 +269,7 @@ void VKVertexProgram::Compile() VkDevice dev = (VkDevice)*vk::get_current_renderer(); vkCreateShaderModule(dev, &vs_info, nullptr, &handle); - id = (u32)(handle); + id = (u32)((u64)handle); } void VKVertexProgram::Delete() diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index f914460bb2..00e1399d38 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -45,7 +45,7 @@ public: ParamArray parr; VkShaderModule handle = nullptr; - int id; + u32 id; std::string shader; std::vector uniforms; From f384d8704430e2e5248996825f91c05eb2f13f0d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 7 Mar 2016 20:51:32 +0300 Subject: [PATCH 06/13] Fix build dependancies and fix gcc build Factor out _aligned_malloc functions to WIN32 only Attempt to fix CmakeLists error More CMakeLists fixes (glslang) Add OSDependent libs Add OGLCompiler and SPIRV as well --- rpcs3/CMakeLists.txt | 6 +++--- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index c74ceb44a1..8ba5a8f38c 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -191,14 +191,14 @@ if(WIN32) # I'm not sure we need all of these libs, but we link them in vs else() target_link_libraries(rpcs3 dxgi.lib d2d1.lib dwrite.lib) endif() - target_link_libraries(rpcs3 asmjit.lib avformat.lib avcodec.lib avutil.lib swresample.lib swscale.lib png16_static ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${ADDITIONAL_LIBS} ${vulkan} ${glslang}) + target_link_libraries(rpcs3 asmjit.lib avformat.lib avcodec.lib avutil.lib swresample.lib swscale.lib png16_static ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${ADDITIONAL_LIBS} vulkan glslang OSDependent OGLCompiler SPIRV) else() if(LLVM_FOUND) target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES}) - target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${LLVM_LIBS} ${ADDITIONAL_LIBS} ${vulkan} ${glslang}) + target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${LLVM_LIBS} ${ADDITIONAL_LIBS} vulkan glslang OSDependent OGLCompiler SPIRV) else() target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES}) - target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${ADDITIONAL_LIBS} ${vulkan} ${glslang}) + target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${ADDITIONAL_LIBS} vulkan glslang OSDependent OGLCompiler SPIRV) endif() endif() diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 001265384f..a9c2c54a32 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -19,12 +19,20 @@ namespace vk VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { +#ifdef _WIN32 return _aligned_malloc(size, alignment); +#else + return malloc(size); +#endif } VKAPI_ATTR void VKAPI_CALL mem_free(void *pUserData, void *pMemory) { +#ifdef _WIN32 _aligned_free(pMemory); +#else + free(pMemory); +#endif } VkFormat get_compatible_sampler_format(u32 format, VkComponentMapping& swizzle, u8 swizzle_mask) @@ -278,7 +286,9 @@ namespace vk uint64_t srcObject, size_t location, int32_t msgCode, const char *pLayerPrefix, const char *pMsg, void *pUserData) { +#ifdef _WIN32 DebugBreak(); +#endif return false; } From d910d2c5721fa13e546982ea0378c04cef63c601 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 8 Mar 2016 00:21:28 +0300 Subject: [PATCH 07/13] Fix vulkan swap modes for nvidia CMakeLists edits Check for linear tiling support for all usage attributes --- rpcs3/CMakeLists.txt | 2 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 24 ++++++++++++++++++- rpcs3/Emu/RSX/VK/VKTexture.cpp | 44 +++++++++++++++++++++++++++------- 3 files changed, 59 insertions(+), 11 deletions(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 8ba5a8f38c..47368c5071 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -191,7 +191,7 @@ if(WIN32) # I'm not sure we need all of these libs, but we link them in vs else() target_link_libraries(rpcs3 dxgi.lib d2d1.lib dwrite.lib) endif() - target_link_libraries(rpcs3 asmjit.lib avformat.lib avcodec.lib avutil.lib swresample.lib swscale.lib png16_static ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${ADDITIONAL_LIBS} vulkan glslang OSDependent OGLCompiler SPIRV) + target_link_libraries(rpcs3 asmjit.lib avformat.lib avcodec.lib avutil.lib swresample.lib swscale.lib png16_static ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${ADDITIONAL_LIBS} ${vulkan} ${glslang} ${OSDependent} ${OGLCompiler} ${SPIRV}) else() if(LLVM_FOUND) target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES}) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 43dc9f2e3b..51427acb87 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -726,7 +726,29 @@ namespace vk height = surface_descriptors.currentExtent.height; } - VkPresentModeKHR swapchain_present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + uint32_t nb_available_modes = 0; + CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &nb_available_modes, nullptr)); + + std::vector present_modes(nb_available_modes); + CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &nb_available_modes, present_modes.data())); + + VkPresentModeKHR swapchain_present_mode = VK_PRESENT_MODE_FIFO_KHR; + + for (VkPresentModeKHR mode : present_modes) + { + if (mode == VK_PRESENT_MODE_MAILBOX_KHR) + { + //If we can get a mailbox mode, use it + swapchain_present_mode = mode; + break; + } + + //If we can get out of using the FIFO mode, take it. Fifo is very high latency (generic vsync) + if (swapchain_present_mode == VK_PRESENT_MODE_FIFO_KHR && + (mode == VK_PRESENT_MODE_IMMEDIATE_KHR || mode == VK_PRESENT_MODE_FIFO_RELAXED_KHR)) + swapchain_present_mode = mode; + } + uint32_t nb_swap_images = surface_descriptors.minImageCount + 1; if ((surface_descriptors.maxImageCount > 0) && (nb_swap_images > surface_descriptors.maxImageCount)) diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 718897c366..1232669695 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -233,18 +233,44 @@ namespace vk { VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL; - if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) - { - VkFormatProperties props; - vkGetPhysicalDeviceFormatProperties(device.gpu(), format, &props); + /* The spec mandates checking against all usage bits for support in either linear or optimal tiling modes. + * Ideally, no assumptions should be made, but for simplification, we'll assume optimal mode suppoorts everything + */ - //Enable linear tiling if supported and we request a sampled image.. - if (props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) - tiling = VK_IMAGE_TILING_LINEAR; - else - usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(device.gpu(), format, &props); + + bool linear_is_supported = true; + + if (!!(usage & VK_IMAGE_USAGE_SAMPLED_BIT)) + { + if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) + linear_is_supported = false; } + if (linear_is_supported && !!(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) + { + if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) + linear_is_supported = false; + } + + if (linear_is_supported && !!(usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) + { + if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) + linear_is_supported = false; + } + + if (linear_is_supported && !!(usage & VK_IMAGE_USAGE_STORAGE_BIT)) + { + if (!(props.linearTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) + linear_is_supported = false; + } + + if (linear_is_supported) + tiling = VK_IMAGE_TILING_LINEAR; + else + usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + create(device, format, usage, tiling, width, height, mipmaps, gpu_only, swizzle); } From b018c9113598b3ff923c242878f957c390570a5f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 8 Mar 2016 00:56:34 +0300 Subject: [PATCH 08/13] Make render-targets GPU resident Fix minor regressions that occured during merge --- rpcs3/Emu/RSX/VK/VKHelpers.h | 24 ++++++++++++++++++++++-- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 4 ++-- rpcs3/Emu/RSX/VK/VKTexture.cpp | 2 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 9 ++++----- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 51427acb87..2842096933 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -240,12 +240,13 @@ namespace vk VkDeviceMemory vram = nullptr; vk::render_device *owner = nullptr; u64 vram_block_sz = 0; + bool mappable = false; public: memory_block() {} ~memory_block() {} - void allocate_from_pool(vk::render_device &device, u64 block_sz, u32 typeBits) + void allocate_from_pool(vk::render_device &device, u64 block_sz, bool host_visible, u32 typeBits) { if (vram) destroy(); @@ -254,8 +255,13 @@ namespace vk owner = (vk::render_device*)&device; VkDevice dev = (VkDevice)(*owner); + + u32 access_mask = 0; - if (!owner->get_compatible_memory_type(typeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &typeIndex)) + if (host_visible) + access_mask |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + + if (!owner->get_compatible_memory_type(typeBits, access_mask, &typeIndex)) throw EXCEPTION("Could not find suitable memory type!"); VkMemoryAllocateInfo infos; @@ -266,6 +272,12 @@ namespace vk CHECK_RESULT(vkAllocateMemory(dev, &infos, nullptr, &vram)); vram_block_sz = block_sz; + mappable = host_visible; + } + + void allocate_from_pool(vk::render_device &device, u64 block_sz, u32 typeBits) + { + allocate_from_pool(device, block_sz, true, typeBits); } void destroy() @@ -278,6 +290,11 @@ namespace vk vram_block_sz = 0; } + bool is_mappable() + { + return mappable; + } + vk::render_device& get_owner() { return (*owner); @@ -419,7 +436,10 @@ namespace vk void *map(u32 offset, u64 size) { + if (!vram.is_mappable()) return nullptr; + void *data = nullptr; + if (size == VK_WHOLE_SIZE) size = m_memory_layout.size; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 666a2b6e52..75fb00c6dc 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -19,7 +19,7 @@ namespace rsx VkFormat requested_format = vk::get_compatible_surface_format(format); vk::texture rtt; - rtt.create(device, requested_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height); + rtt.create(device, requested_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height, 1, true); rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); return rtt; @@ -30,7 +30,7 @@ namespace rsx VkFormat requested_format = vk::get_compatible_depth_surface_format(format); vk::texture rtt; - rtt.create(device, requested_format, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height); + rtt.create(device, requested_format, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height, 1, true); rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); return rtt; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 1232669695..5a14627c5c 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -162,7 +162,7 @@ namespace vk CHECK_RESULT(vkCreateImage(device, &image_info, nullptr, &m_image_contents)); vkGetImageMemoryRequirements(device, m_image_contents, &m_memory_layout); - vram_allocation.allocate_from_pool(device, m_memory_layout.size, m_memory_layout.memoryTypeBits); + vram_allocation.allocate_from_pool(device, m_memory_layout.size, !gpu_only, m_memory_layout.memoryTypeBits); CHECK_RESULT(vkBindImageMemory(device, m_image_contents, vram_allocation, 0)); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 7fc83fa3a1..d7cb8194ce 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -289,7 +289,6 @@ VKGSRender::upload_vertex_data() continue; } - const u32 host_element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); const u32 element_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size); const u32 data_size = element_size * vertex_draw_count; const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size); @@ -311,19 +310,19 @@ VKGSRender::upload_vertex_data() switch (vertex_info.type) { case rsx::vertex_base_type::f: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, element_size, stride); break; case rsx::vertex_base_type::sf: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, element_size, stride); break; case rsx::vertex_base_type::s1: case rsx::vertex_base_type::ub: case rsx::vertex_base_type::ub256: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, element_size, stride); break; case rsx::vertex_base_type::s32k: case rsx::vertex_base_type::cmp: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, element_size, stride); break; default: throw EXCEPTION("Unknown base type %d", vertex_info.type); From 47d251a818a379558a9ea307a04ec88784fffbef Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 9 Mar 2016 13:23:25 +0300 Subject: [PATCH 09/13] Change render target layout before clearing Use LAYOUT_GENERAL during clear; move renderpass begin to draw call end --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 43 +++++++++++++++++++----------- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 30 +++++++++++++++++++++ 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index de9d617a66..2cd6b87bb7 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -357,19 +357,6 @@ void VKGSRender::begin() //TODO: Set up other render-state parameters into the program pipeline - VkRenderPassBeginInfo rp_begin; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.pNext = NULL; - rp_begin.renderPass = m_render_pass; - rp_begin.framebuffer = m_framebuffer; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_frame->client_size().width; - rp_begin.renderArea.extent.height = m_frame->client_size().height; - rp_begin.clearValueCount = 0; - rp_begin.pClearValues = nullptr; - - vkCmdBeginRenderPass(m_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); m_draw_calls++; } @@ -394,7 +381,21 @@ namespace } void VKGSRender::end() -{ +{ + VkRenderPassBeginInfo rp_begin; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = m_render_pass; + rp_begin.framebuffer = m_framebuffer; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_frame->client_size().width; + rp_begin.renderArea.extent.height = m_frame->client_size().height; + rp_begin.clearValueCount = 0; + rp_begin.pClearValues = nullptr; + + vkCmdBeginRenderPass(m_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + vk::texture *texture0 = nullptr; for (int i = 0; i < rsx::limits::textures_count; ++i) { @@ -571,12 +572,22 @@ void VKGSRender::clear_surface(u32 mask) if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr) continue; VkImage color_image = (*std::get<1>(m_rtts.m_bound_render_targets[i])); - vkCmdClearColorImage(m_command_buffer, color_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &color_clear_values.color, 1, &range); + VkImageLayout old_layout = std::get<1>(m_rtts.m_bound_render_targets[i])->get_layout(); + std::get<1>(m_rtts.m_bound_render_targets[i])->change_layout(m_command_buffer, VK_IMAGE_LAYOUT_GENERAL); + + vkCmdClearColorImage(m_command_buffer, color_image, VK_IMAGE_LAYOUT_GENERAL, &color_clear_values.color, 1, &range); + std::get<1>(m_rtts.m_bound_render_targets[i])->change_layout(m_command_buffer, old_layout); } } if (mask & 0x3) - vkCmdClearDepthStencilImage(m_command_buffer, (*std::get<1>(m_rtts.m_bound_depth_stencil)), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depth_stencil_clear_values.depthStencil, 1, &depth_range); + { + VkImageLayout old_layout = std::get<1>(m_rtts.m_bound_depth_stencil)->get_layout(); + std::get<1>(m_rtts.m_bound_depth_stencil)->change_layout(m_command_buffer, VK_IMAGE_LAYOUT_GENERAL); + + vkCmdClearDepthStencilImage(m_command_buffer, (*std::get<1>(m_rtts.m_bound_depth_stencil)), VK_IMAGE_LAYOUT_GENERAL, &depth_stencil_clear_values.depthStencil, 1, &depth_range); + std::get<1>(m_rtts.m_bound_depth_stencil)->change_layout(m_command_buffer, old_layout); + } if (!was_recording) { diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 75fb00c6dc..ef877b0052 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -20,6 +20,18 @@ namespace rsx vk::texture rtt; rtt.create(device, requested_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height, 1, true); + rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_GENERAL); + + //Clear new surface + VkClearColorValue clear_color; + VkImageSubresourceRange range = vk::default_image_subresource_range(); + + clear_color.float32[0] = 0.f; + clear_color.float32[1] = 0.f; + clear_color.float32[2] = 0.f; + clear_color.float32[3] = 0.f; + + vkCmdClearColorImage(*cmd, rtt, rtt.get_layout(), &clear_color, 1, &range); rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); return rtt; @@ -31,6 +43,24 @@ namespace rsx vk::texture rtt; rtt.create(device, requested_format, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height, 1, true); + rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_GENERAL); + + //Clear new surface.. + VkClearDepthStencilValue clear_depth; + VkImageSubresourceRange range; + range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + range.baseArrayLayer = 0; + range.baseMipLevel = 0; + range.layerCount = 1; + range.levelCount = 1; + + if (format == surface_depth_format::z24s8) + range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + + clear_depth.depth = 1.f; + clear_depth.stencil = 0; + + vkCmdClearDepthStencilImage(*cmd, rtt, rtt.get_layout(), &clear_depth, 1, &range); rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); return rtt; From f0ded467538722145522b4f6a738ddc243770aa1 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 10 Mar 2016 15:57:54 +0300 Subject: [PATCH 10/13] Fix apple build by excluding vk project --- rpcs3/CMakeLists.txt | 23 +++++++++++++++++++---- rpcs3/rpcs3.cpp | 2 +- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 47368c5071..ee35dc1e52 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -173,6 +173,16 @@ RPCS3_SRC "${RPCS3_SRC_DIR}/../rsx_program_decompiler/shader_code/*" ) +if(APPLE) + set (EXCLUDE_DIR "/RSX/VK/") + foreach (TMP_PATH ${RPCS3_SRC}) + string (FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND) + if (NOT ${EXCLUDE_DIR_FOUND} EQUAL -1) + list (REMOVE_ITEM RPCS3_SRC ${TMP_PATH}) + endif () + endforeach(TMP_PATH) +endif() + add_executable(rpcs3 ${RPCS3_SRC}) @@ -195,13 +205,18 @@ if(WIN32) # I'm not sure we need all of these libs, but we link them in vs else() if(LLVM_FOUND) target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES}) - target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${LLVM_LIBS} ${ADDITIONAL_LIBS} vulkan glslang OSDependent OGLCompiler SPIRV) + target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${LLVM_LIBS} ${ADDITIONAL_LIBS}) + if (NOT APPLE) + target_link_libraries(rpcs3 vulkan glslang OSDependent OGLCompiler SPIRV) + endif() else() target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES}) - target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${ADDITIONAL_LIBS} vulkan glslang OSDependent OGLCompiler SPIRV) + target_link_libraries(rpcs3 libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a png16_static ${ZLIB_LIBRARIES} ${ADDITIONAL_LIBS}) + if (NOT APPLE) + target_link_libraries(rpcs3 vulkan glslang OSDependent OGLCompiler SPIRV) + endif() endif() endif() set_target_properties(rpcs3 PROPERTIES COTIRE_CXX_PREFIX_HEADER_INIT "${RPCS3_SRC_DIR}/stdafx.h") -cotire(rpcs3) - +cotire(rpcs3) \ No newline at end of file diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index 92ae1d3b03..4ced904402 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -32,10 +32,10 @@ #include "Emu/RSX/Null/NullGSRender.h" #include "Emu/RSX/GL/GLGSRender.h" -#include "Emu/RSX/VK/VKGSRender.h" #include "Emu/Audio/Null/NullAudioThread.h" #include "Emu/Audio/AL/OpenALThread.h" #ifdef _MSC_VER +#include "Emu/RSX/VK/VKGSRender.h" #include "Emu/RSX/D3D12/D3D12GSRender.h" #include "Emu/Audio/XAudio2/XAudio2Thread.h" #endif From 26964efa7ecd6ead84551dbbd5de99fedcb4c3f1 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 10 Mar 2016 18:14:02 +0300 Subject: [PATCH 11/13] Support stencil formats Fix appveyor build --- rpcs3/CMakeLists.txt | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 +++--- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 2 +- rpcs3/Emu/RSX/VK/VKTexture.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index ee35dc1e52..e6fc5c5639 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -201,7 +201,7 @@ if(WIN32) # I'm not sure we need all of these libs, but we link them in vs else() target_link_libraries(rpcs3 dxgi.lib d2d1.lib dwrite.lib) endif() - target_link_libraries(rpcs3 asmjit.lib avformat.lib avcodec.lib avutil.lib swresample.lib swscale.lib png16_static ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${ADDITIONAL_LIBS} ${vulkan} ${glslang} ${OSDependent} ${OGLCompiler} ${SPIRV}) + target_link_libraries(rpcs3 asmjit.lib avformat.lib avcodec.lib avutil.lib swresample.lib swscale.lib png16_static ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${ADDITIONAL_LIBS} VKstatic.1 glslang OSDependent OGLCompiler SPIRV) else() if(LLVM_FOUND) target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES}) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 2cd6b87bb7..ac557a5ebb 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -93,7 +93,7 @@ namespace vk switch (format) { case rsx::surface_depth_format::z16: return VK_FORMAT_D16_UNORM; - case rsx::surface_depth_format::z24s8: return VK_FORMAT_D16_UNORM; + case rsx::surface_depth_format::z24s8: return VK_FORMAT_D16_UNORM_S8_UINT; //Cant get D24_S8 to work on AMD (beta 5) } throw EXCEPTION("Invalid format (0x%x)", format); } @@ -626,9 +626,9 @@ void VKGSRender::init_render_pass(VkFormat surface_format, VkFormat depth_format attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; //PRESENT_SRC_KHR?? attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachments[1].format = VK_FORMAT_D16_UNORM; /* Depth buffer format. Should be more elegant than this */ + attachments[1].format = depth_format; /* Depth buffer format. Should be more elegant than this */ attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index ef877b0052..4df472990e 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -54,7 +54,7 @@ namespace rsx range.layerCount = 1; range.levelCount = 1; - if (format == surface_depth_format::z24s8) + if (requested_format != VK_FORMAT_D16_UNORM) range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; clear_depth.depth = 1.f; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 5a14627c5c..9fe0bc4db8 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -178,7 +178,7 @@ namespace vk if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT/* | VK_IMAGE_ASPECT_STENCIL_BIT*/; + view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; m_image_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; } From 93032be4aba36518ae501ca30171d1cdd0d82f18 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 10 Mar 2016 20:45:57 +0300 Subject: [PATCH 12/13] Move waiting for submit fence to the right place (nvidia crash) --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ac557a5ebb..cf62146cb5 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -993,17 +993,6 @@ void VKGSRender::flip(int buffer) aspect_ratio.size = m_frame->client_size(); } - //Check if anything is waiting in queue and submit it if possible.. - if (m_submit_fence) - { - CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); - - vkDestroyFence((*m_device), m_submit_fence, nullptr); - m_submit_fence = nullptr; - - CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0)); - } - VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); uint32_t next_image_temp = 0; @@ -1051,6 +1040,17 @@ void VKGSRender::flip(int buffer) end_command_buffer_recording(); execute_command_buffer(false); + //Check if anything is waiting in queue and wait for it if possible.. + if (m_submit_fence) + { + CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); + + vkDestroyFence((*m_device), m_submit_fence, nullptr); + m_submit_fence = nullptr; + + CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0)); + } + CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue())); From cc713a0091500f68bde84607c9bb86b9cede79b7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 10 Mar 2016 20:58:17 +0300 Subject: [PATCH 13/13] vk: Release dirty resources to prevent mem leak --- rpcs3/Emu/RSX/VK/VKTextureCache.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index a474f3d1cb..470dfe950d 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -30,6 +30,7 @@ namespace vk { private: std::vector m_cache; + u32 num_dirty_textures = 0; bool lock_memory_region(u32 start, u32 size) { @@ -126,6 +127,20 @@ namespace vk unlock_memory_region(obj.protected_rgn_start, obj.native_rsx_size); } + void purge_dirty_textures() + { + for (cached_texture_object &tex : m_cache) + { + if (tex.dirty && tex.exists) + { + tex.uploaded_texture.destroy(); + tex.exists = false; + } + } + + num_dirty_textures = 0; + } + public: texture_cache() {} @@ -147,6 +162,15 @@ namespace vk vk::texture& upload_texture(command_buffer cmd, rsx::texture &tex, rsx::vk_render_targets &m_rtts) { + if (num_dirty_textures > 32) + { + /** + * Should actually reuse available dirty textures whenever possible. + * For now, just remove them, from vram + */ + purge_dirty_textures(); + } + const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); const u32 range = (u32)get_texture_size(tex); @@ -202,6 +226,7 @@ namespace vk { unlock_object(tex); + num_dirty_textures++; tex.native_rsx_address = 0; tex.dirty = true; @@ -233,6 +258,7 @@ namespace vk cto.dirty = true; cto.native_rsx_address = 0; + num_dirty_textures++; m_cache.push_back(cto); } }