diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index f5f66b8dae..d504d2ee03 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -57,26 +57,26 @@ struct copy_unmodified_block_swizzled }; /** - * Texture upload template. - * - * Source textures are stored as following (for power of 2 textures): - * - For linear texture every mipmap level share rowpitch (which is the one of mipmap 0). This means that for non 0 mipmap there's padding between row. - * - For swizzled texture row pitch is texture width X pixel/block size. There's not padding between row. - * - There is no padding between 2 mipmap levels. This means that next mipmap level starts at offset rowpitch X row count - * - Cubemap images are 128 bytes aligned. - * - * The template iterates over all depth (including cubemap) and over all mipmaps. - * The alignment is 256 for mipmap levels and 512 for depth (TODO: make this customisable for Vulkan ?) - * The template takes a struct with a "copy_mipmap_level" static function that copy the given mipmap level and returns the offset to add to the src buffer for next - * mipmap level (to allow same code for packed/non packed texels) - * Sometimes texture provides a pitch even if texture is swizzled (and then packed) and in such case it's ignored. It's passed via suggested_pitch and is used only if padded_row is false. - */ +* Texture upload template. +* +* Source textures are stored as following (for power of 2 textures): +* - For linear texture every mipmap level share rowpitch (which is the one of mipmap 0). This means that for non 0 mipmap there's padding between row. +* - For swizzled texture row pitch is texture width X pixel/block size. There's not padding between row. +* - There is no padding between 2 mipmap levels. This means that next mipmap level starts at offset rowpitch X row count +* - Cubemap images are 128 bytes aligned. +* +* The template iterates over all depth (including cubemap) and over all mipmaps. +* The alignment is 256 for mipmap levels and 512 for depth (DX12), varies for vulkan +* The template takes a struct with a "copy_mipmap_level" static function that copy the given mipmap level and returns the offset to add to the src buffer for next +* mipmap level (to allow same code for packed/non packed texels) +* Sometimes texture provides a pitch even if texture is swizzled (and then packed) and in such case it's ignored. It's passed via suggested_pitch and is used only if padded_row is false. +*/ template -std::vector copy_texture_data(gsl::span dst, const SRC_TYPE *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u32 suggested_pitch_in_bytes) +std::vector copy_texture_data(gsl::span dst, const SRC_TYPE *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u32 suggested_pitch_in_bytes, size_t alignment) { /** - * Note about size type: RSX texture width is stored in a 16 bits int and pitch is stored in a 20 bits int. - */ + * Note about size type: RSX texture width is stored in a 16 bits int and pitch is stored in a 20 bits int. + */ // <= 128 so fits in u8 u8 block_size_in_bytes = sizeof(DST_TYPE); @@ -92,7 +92,7 @@ std::vector copy_texture_data(gsl::span dst, const SR for (unsigned mip_level = 0; mip_level < mipmap_count; mip_level++) { // since mip_level is up to 16 bits needs at least 17 bits. - u32 dst_pitch = align(miplevel_width_in_block * block_size_in_bytes, 256) / block_size_in_bytes; + u32 dst_pitch = align(miplevel_width_in_block * block_size_in_bytes, alignment) / block_size_in_bytes; MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; @@ -118,6 +118,44 @@ std::vector copy_texture_data(gsl::span dst, const SR return Result; } +/** + * Copy a single mipmap level starting at a given offset with a given rowpitch alignment + */ + +template +void copy_single_mipmap_layer(gsl::span dst, const SRC_TYPE *src, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u16 mipmap_index, u16 layer_index, u32 suggested_pitch_in_bytes, u32 dst_pitch) +{ + u8 block_size_in_bytes = sizeof(DST_TYPE); + size_t offsetInSrc = 0; + + u16 texture_height_in_block = (height_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; + u16 texture_width_in_block = (width_in_texel + block_edge_in_texel - 1) / block_edge_in_texel; + + for (unsigned layer = 0; layer <= layer_index; layer++) + { + u16 miplevel_height_in_block = texture_height_in_block, miplevel_width_in_block = texture_width_in_block; + for (unsigned mip_level = 0; mip_level < mipmap_count; mip_level++) + { + u32 src_pitch_in_block = padded_row ? suggested_pitch_in_bytes / block_size_in_bytes : miplevel_width_in_block; + u32 dst_pitch_in_block = dst_pitch / block_size_in_bytes; + const SRC_TYPE *src_with_offset = reinterpret_cast(reinterpret_cast(src) + offsetInSrc); + + if (mip_level == mipmap_index && + layer == layer_index) + { + T::copy_mipmap_level(dst.subspan(0, dst_pitch_in_block * depth * miplevel_height_in_block), src_with_offset, miplevel_height_in_block, miplevel_width_in_block, depth, dst_pitch_in_block, src_pitch_in_block); + break; + } + + offsetInSrc += miplevel_height_in_block * src_pitch_in_block * block_size_in_bytes * depth; + miplevel_height_in_block = MAX2(miplevel_height_in_block / 2, 1); + miplevel_width_in_block = MAX2(miplevel_width_in_block / 2, 1); + } + + offsetInSrc = align(offsetInSrc, 128); + } +} + /** * A texture is stored as an array of blocks, where a block is a pixel for standard texture * but is a structure containing several pixels for compressed format @@ -202,7 +240,7 @@ size_t get_texture_block_edge(u32 format) } -size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement) +size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement, size_t mipmapAlignment) { size_t w = texture.width(), h = texture.height(), d = MAX2(texture.depth(), 1); @@ -218,7 +256,7 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi for (unsigned mipmap = 0; mipmap < texture.mipmap(); ++mipmap) { size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement); - result += align(rowPitch * heightInBlocks * d, 512); + result += align(rowPitch * heightInBlocks * d, mipmapAlignment); heightInBlocks = MAX2(heightInBlocks / 2, 1); widthInBlocks = MAX2(widthInBlocks / 2, 1); } @@ -226,7 +264,7 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi return result * (texture.cubemap() ? 6 : 1); } -std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignement) +std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignment) { u16 w = texture.width(), h = texture.height(); u16 depth; @@ -262,45 +300,132 @@ std::vector upload_placed_texture(gsl::span mapped_b { case CELL_GCM_TEXTURE_A8R8G8B8: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), 4 * w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), 4 * w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_COMPRESSED_DXT1: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_COMPRESSED_DXT23: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_COMPRESSED_DXT45: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); case CELL_GCM_TEXTURE_B8: if (is_swizzled) - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); else - return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch()); - case CELL_GCM_TEXTURE_DEPTH24_D8: // Opaque type ; ATM do not copy anything - return std::vector(); + return copy_texture_data(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), texture.pitch(), rowPitchAlignment); } throw EXCEPTION("Wrong format %d", format); } +/** + * Upload texture mipmaps where alignment and offset information is provided manually + */ +void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info) +{ + u16 w = texture.width(), h = texture.height(); + u16 depth; + u8 layer; + + if (texture.dimension() == 1) + { + depth = 1; + layer = 1; + h = 1; + } + else if (texture.dimension() == 2) + { + depth = 1; + layer = texture.cubemap() ? 6 : 1; + } + else if (texture.dimension() == 3) + { + depth = texture.depth(); + layer = 1; + } + else + throw EXCEPTION("Unsupported texture dimension %d", texture.dimension()); + + int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + const u32 texaddr = rsx::get_address(texture.offset(), texture.location()); + auto pixels = vm::ps3::_ptr(texaddr); + bool is_swizzled = !(texture.format() & CELL_GCM_TEXTURE_LN); + + //TODO: Layers greater than 0 + for (u32 mip_level = 0; mip_level < texture.mipmap(); ++mip_level) + { + gsl::span mapped_buffer = dst_buffer.subspan(alignment_offset_info[mip_level].first); + + switch (format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast*>(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + case CELL_GCM_TEXTURE_B8: + if (is_swizzled) + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + else + copy_single_mipmap_layer(as_span_workaround(mapped_buffer), reinterpret_cast(pixels), w, h, depth, layer, texture.mipmap(), mip_level, 0, texture.pitch(), alignment_offset_info[mip_level].second); + break; + default: + throw EXCEPTION("Wrong format %d", format); + } + } +} + size_t get_texture_size(const rsx::texture &texture) { size_t w = texture.width(), h = texture.height(); diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 34bff63c9c..b327faa902 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -15,7 +15,7 @@ struct MipmapLevelInfo * Get size to store texture in a linear fashion. * Storage is assumed to use a rowPitchAlignement boundary for every row of texture. */ -size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement); +size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement, size_t mipmapAlignment=512); /** * Write texture data to textureData. @@ -24,6 +24,13 @@ size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPi */ std::vector upload_placed_texture(gsl::span mapped_buffer, const rsx::texture &texture, size_t rowPitchAlignement); +/** +* Upload texture mipmaps where alignment and offset information is provided manually. +* alignment_offset info is an array of N mipmaps providing the offset into the data block and row-pitch alignment of each +* mipmap level individually. +*/ +void upload_texture_mipmaps(gsl::span dst_buffer, const rsx::texture &texture, std::vector> alignment_offset_info); + /** * Get number of bytes occupied by texture in RSX mem */ diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp new file mode 100644 index 0000000000..2d00f5d697 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -0,0 +1,291 @@ +#include "stdafx.h" +#include "VKCommonDecompiler.h" +#include "../VulKan/glslang/SPIRV/GlslangToSpv.h" + +namespace vk +{ + std::string getFloatTypeNameImpl(size_t elementCount) + { + switch (elementCount) + { + default: + abort(); + case 1: + return "float"; + case 2: + return "vec2"; + case 3: + return "vec3"; + case 4: + return "vec4"; + } + } + + std::string getFunctionImpl(FUNCTION f) + { + switch (f) + { + default: + abort(); + case FUNCTION::FUNCTION_DP2: + return "vec4(dot($0.xy, $1.xy))"; + case FUNCTION::FUNCTION_DP2A: + return ""; + case FUNCTION::FUNCTION_DP3: + return "vec4(dot($0.xyz, $1.xyz))"; + case FUNCTION::FUNCTION_DP4: + return "vec4(dot($0, $1))"; + case FUNCTION::FUNCTION_DPH: + return "vec4(dot(vec4($0.xyz, 1.0), $1))"; + case FUNCTION::FUNCTION_SFL: + return "vec4(0., 0., 0., 0.)"; + case FUNCTION::FUNCTION_STR: + return "vec4(1., 1., 1., 1.)"; + case FUNCTION::FUNCTION_FRACT: + return "fract($0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D: + return "texture($t, $0.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_PROJ: + return "textureProj($t, $0.x, $1.x)"; // Note: $1.x is bias + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_LOD: + return "textureLod($t, $0.x, $1)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D: + return "texture($t, $0.xy)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_PROJ: + return "textureProj($t, $0.xyz, $1.x)"; // Note: $1.x is bias + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_LOD: + return "textureLod($t, $0.xy, $1)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE: + return "texture($t, $0.xyz)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_PROJ: + return "textureProj($t, $0.xyzw, $1.x)"; // Note: $1.x is bias + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_LOD: + return "textureLod($t, $0.xyz, $1)"; + case FUNCTION::FUNCTION_DFDX: + return "dFdx($0)"; + case FUNCTION::FUNCTION_DFDY: + return "dFdy($0)"; + } + } + + std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1) + { + switch (f) + { + case COMPARE::FUNCTION_SEQ: + return "equal(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGE: + return "greaterThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGT: + return "greaterThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLE: + return "lessThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLT: + return "lessThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SNE: + return "notEqual(" + Op0 + ", " + Op1 + ")"; + } + throw EXCEPTION("Unknow compare function"); + } + + void insert_glsl_legacy_function(std::ostream& OS) + { + OS << "vec4 divsq_legacy(vec4 num, vec4 denum)\n"; + OS << "{\n"; + OS << " return num / sqrt(max(denum.xxxx, 1.E-10));\n"; + OS << "}\n"; + + OS << "vec4 rcp_legacy(vec4 denum)\n"; + OS << "{\n"; + OS << " return 1. / denum;\n"; + OS << "}\n"; + + OS << "vec4 rsq_legacy(vec4 val)\n"; + OS << "{\n"; + OS << " return float(1.0 / sqrt(max(val.x, 1.E-10))).xxxx;\n"; + OS << "}\n\n"; + + OS << "vec4 log2_legacy(vec4 val)\n"; + OS << "{\n"; + OS << " return log2(max(val.x, 1.E-10)).xxxx;\n"; + OS << "}\n\n"; + + OS << "vec4 lit_legacy(vec4 val)"; + OS << "{\n"; + OS << " vec4 clamped_val = val;\n"; + OS << " clamped_val.x = max(val.x, 0.);\n"; + OS << " clamped_val.y = max(val.y, 0.);\n"; + OS << " vec4 result;\n"; + OS << " result.x = 1.;\n"; + OS << " result.w = 1.;\n"; + OS << " result.y = clamped_val.x;\n"; + OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.;\n"; + OS << " return result;\n"; + OS << "}\n\n"; + } + + void init_default_resources(TBuiltInResource &rsc) + { + rsc.maxLights = 32; + rsc.maxClipPlanes = 6; + rsc.maxTextureUnits = 32; + rsc.maxTextureCoords = 32; + rsc.maxVertexAttribs = 64; + rsc.maxVertexUniformComponents = 4096; + rsc.maxVaryingFloats = 64; + rsc.maxVertexTextureImageUnits = 32; + rsc.maxCombinedTextureImageUnits = 80; + rsc.maxTextureImageUnits = 32; + rsc.maxFragmentUniformComponents = 4096; + rsc.maxDrawBuffers = 32; + rsc.maxVertexUniformVectors = 128; + rsc.maxVaryingVectors = 8; + rsc.maxFragmentUniformVectors = 16; + rsc.maxVertexOutputVectors = 16; + rsc.maxFragmentInputVectors = 15; + rsc.maxProgramTexelOffset = -8; + rsc.maxProgramTexelOffset = 7; + rsc.maxClipDistances = 8; + rsc.maxComputeWorkGroupCountX = 65535; + rsc.maxComputeWorkGroupCountY = 65535; + rsc.maxComputeWorkGroupCountZ = 65535; + rsc.maxComputeWorkGroupSizeX = 1024; + rsc.maxComputeWorkGroupSizeY = 1024; + rsc.maxComputeWorkGroupSizeZ = 64; + rsc.maxComputeUniformComponents = 1024; + rsc.maxComputeTextureImageUnits = 16; + rsc.maxComputeImageUniforms = 8; + rsc.maxComputeAtomicCounters = 8; + rsc.maxComputeAtomicCounterBuffers = 1; + rsc.maxVaryingComponents = 60; + rsc.maxVertexOutputComponents = 64; + rsc.maxGeometryInputComponents = 64; + rsc.maxGeometryOutputComponents = 128; + rsc.maxFragmentInputComponents = 128; + rsc.maxImageUnits = 8; + rsc.maxCombinedImageUnitsAndFragmentOutputs = 8; + rsc.maxCombinedShaderOutputResources = 8; + rsc.maxImageSamples = 0; + rsc.maxVertexImageUniforms = 0; + rsc.maxTessControlImageUniforms = 0; + rsc.maxTessEvaluationImageUniforms = 0; + rsc.maxGeometryImageUniforms = 0; + rsc.maxFragmentImageUniforms = 8; + rsc.maxCombinedImageUniforms = 8; + rsc.maxGeometryTextureImageUnits = 16; + rsc.maxGeometryOutputVertices = 256; + rsc.maxGeometryTotalOutputComponents = 1024; + rsc.maxGeometryUniformComponents = 1024; + rsc.maxGeometryVaryingComponents = 64; + rsc.maxTessControlInputComponents = 128; + rsc.maxTessControlOutputComponents = 128; + rsc.maxTessControlTextureImageUnits = 16; + rsc.maxTessControlUniformComponents = 1024; + rsc.maxTessControlTotalOutputComponents = 4096; + rsc.maxTessEvaluationInputComponents = 128; + rsc.maxTessEvaluationOutputComponents = 128; + rsc.maxTessEvaluationTextureImageUnits = 16; + rsc.maxTessEvaluationUniformComponents = 1024; + rsc.maxTessPatchComponents = 120; + rsc.maxPatchVertices = 32; + rsc.maxTessGenLevel = 64; + rsc.maxViewports = 16; + rsc.maxVertexAtomicCounters = 0; + rsc.maxTessControlAtomicCounters = 0; + rsc.maxTessEvaluationAtomicCounters = 0; + rsc.maxGeometryAtomicCounters = 0; + rsc.maxFragmentAtomicCounters = 8; + rsc.maxCombinedAtomicCounters = 8; + rsc.maxAtomicCounterBindings = 1; + rsc.maxVertexAtomicCounterBuffers = 0; + rsc.maxTessControlAtomicCounterBuffers = 0; + rsc.maxTessEvaluationAtomicCounterBuffers = 0; + rsc.maxGeometryAtomicCounterBuffers = 0; + rsc.maxFragmentAtomicCounterBuffers = 1; + rsc.maxCombinedAtomicCounterBuffers = 1; + rsc.maxAtomicCounterBufferSize = 16384; + rsc.maxTransformFeedbackBuffers = 4; + rsc.maxTransformFeedbackInterleavedComponents = 64; + rsc.maxCullDistances = 8; + rsc.maxCombinedClipAndCullDistances = 8; + rsc.maxSamples = 4; + + rsc.limits.nonInductiveForLoops = 1; + rsc.limits.whileLoops = 1; + rsc.limits.doWhileLoops = 1; + rsc.limits.generalUniformIndexing = 1; + rsc.limits.generalAttributeMatrixVectorIndexing = 1; + rsc.limits.generalVaryingIndexing = 1; + rsc.limits.generalSamplerIndexing = 1; + rsc.limits.generalVariableIndexing = 1; + rsc.limits.generalConstantMatrixVectorIndexing = 1; + } + + static const varying_register_t varying_regs[] = + { + { "diff_color", 0 }, + { "tc0", 1 }, + { "tc1", 2 }, + { "tc2", 3 }, + { "tc3", 4 }, + { "tc4", 5 }, + { "tc5", 6 }, + { "tc6", 7 }, + { "tc7", 8 }, + { "tc8", 9 }, + { "tc9", 10 }, + { "front_diff_color", 11 }, + { "front_spec_color", 12 }, + { "spec_color", 13 }, + { "fog_c", 14 }, + { "fogc", 14 } + }; + + const varying_register_t & get_varying_register(const std::string & name) + { + for (const auto&t : varying_regs) + { + if (t.name == name) + return t; + } + + throw EXCEPTION("Unknown register name: %s", name); + } + + bool compile_glsl_to_spv(std::string& shader, glsl::program_domain domain, std::vector& spv) + { + EShLanguage lang = (domain == glsl::glsl_fragment_program) ? EShLangFragment : EShLangVertex; + + glslang::InitializeProcess(); + glslang::TProgram program; + glslang::TShader shader_object(lang); + + bool success = false; + const char *shader_text = shader.data(); + + TBuiltInResource rsc; + init_default_resources(rsc); + + shader_object.setStrings(&shader_text, 1); + + EShMessages msg = (EShMessages)(EShMsgVulkanRules | EShMsgSpvRules); + if (shader_object.parse(&rsc, 400, EProfile::ECoreProfile, false, true, msg)) + { + program.addShader(&shader_object); + success = program.link(EShMsgVulkanRules); + if (success) + { + glslang::TIntermediate* bytes = program.getIntermediate(lang); + glslang::GlslangToSpv(*bytes, spv); + } + } + else + { + LOG_ERROR(RSX, shader_object.getInfoLog()); + LOG_ERROR(RSX, shader_object.getInfoDebugLog()); + } + + glslang::FinalizeProcess(); + return success; + } +} diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h new file mode 100644 index 0000000000..b5b72f70b6 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h @@ -0,0 +1,20 @@ +#pragma once +#include "../Common/ShaderParam.h" +#include "VKHelpers.h" + +namespace vk +{ + struct varying_register_t + { + std::string name; + int reg_location; + }; + + std::string getFloatTypeNameImpl(size_t elementCount); + std::string getFunctionImpl(FUNCTION f); + std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1); + void insert_glsl_legacy_function(std::ostream& OS); + + const varying_register_t& get_varying_register(const std::string& name); + bool compile_glsl_to_spv(std::string& shader, glsl::program_domain domain, std::vector &spv); +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp new file mode 100644 index 0000000000..750df896cd --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -0,0 +1,321 @@ +#include "stdafx.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "VKFragmentProgram.h" + +#include "VKCommonDecompiler.h" +#include "VKHelpers.h" +#include "../GCM.h" + +std::string VKFragmentDecompilerThread::getFloatTypeName(size_t elementCount) +{ + return vk::getFloatTypeNameImpl(elementCount); +} + +std::string VKFragmentDecompilerThread::getFunction(FUNCTION f) +{ + return vk::getFunctionImpl(f); +} + +std::string VKFragmentDecompilerThread::saturate(const std::string & code) +{ + return "clamp(" + code + ", 0., 1.)"; +} + +std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) +{ + return vk::compareFunctionImpl(f, Op0, Op1); +} + +void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) +{ + OS << "#version 420" << std::endl; + OS << "#extension GL_ARB_separate_shader_objects: enable" << std::endl << std::endl; + + OS << "layout(std140, set=1, binding = 0) uniform ScaleOffsetBuffer" << std::endl; + OS << "{" << std::endl; + OS << " mat4 scaleOffsetMat;" << std::endl; + OS << " float fog_param0;" << std::endl; + OS << " float fog_param1;" << std::endl; + OS << "};" << std::endl << std::endl; + + vk::glsl::program_input in; + in.location = 0; + in.domain = vk::glsl::glsl_fragment_program; + in.name = "ScaleOffsetBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +void VKFragmentDecompilerThread::insertIntputs(std::stringstream & OS) +{ + for (const ParamType& PT : m_parr.params[PF_PARAM_IN]) + { + for (const ParamItem& PI : PT.items) + { + const vk::varying_register_t ® = vk::get_varying_register(PI.name); + + std::string var_name = PI.name; + if (var_name == "fogc") + var_name = "fog_c"; + + OS << "layout(location=" << reg.reg_location << ") in " << PT.type << " " << var_name << ";" << std::endl; + } + } +} + +void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) +{ + const std::pair table[] = + { + { "ocol0", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, + { "ocol1", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, + { "ocol2", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, + { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, + }; + + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) + OS << "layout(location=" << i << ") " << "out vec4 " << table[i].first << ";" << std::endl; + } +} + +void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) +{ + int location = 2; + + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (PT.type != "sampler1D" && + PT.type != "sampler2D" && + PT.type != "sampler3D" && + PT.type != "samplerCube") + continue; + + for (const ParamItem& PI : PT.items) + { + std::string samplerType = PT.type; + int index = atoi(&PI.name.data()[3]); + + if (m_prog.unnormalized_coords & (1 << index)) + samplerType = "sampler2DRect"; + + vk::glsl::program_input in; + in.location = location; + in.domain = vk::glsl::glsl_fragment_program; + in.name = PI.name; + in.type = vk::glsl::input_type_texture; + + inputs.push_back(in); + + OS << "layout(set=1, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";" << std::endl; + } + } + + OS << "layout(std140, set=1, binding = 1) uniform FragmentConstantsBuffer" << std::endl; + OS << "{" << std::endl; + + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (PT.type == "sampler1D" || + PT.type == "sampler2D" || + PT.type == "sampler3D" || + PT.type == "samplerCube") + continue; + + for (const ParamItem& PI : PT.items) + OS << " " << PT.type << " " << PI.name << ";" << std::endl; + } + + // A dummy value otherwise it's invalid to create an empty uniform buffer + OS << " vec4 void_value;" << std::endl; + OS << "};" << std::endl; + + vk::glsl::program_input in; + in.location = 1; + in.domain = vk::glsl::glsl_fragment_program; + in.name = "FragmentConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +namespace vk +{ + // Note: It's not clear whether fog is computed per pixel or per vertex. + // But it makes more sense to compute exp of interpoled value than to interpolate exp values. + void insert_fog_declaration(std::stringstream & OS, rsx::fog_mode mode) + { + switch (mode) + { + case rsx::fog_mode::linear: + OS << " vec4 fogc = vec4(fog_param1 * fog_c.x + (fog_param0 - 1.), fog_param1 * fog_c.x + (fog_param0 - 1.), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential: + OS << " vec4 fogc = vec4(11.084 * (fog_param1 * fog_c.x + fog_param0 - 1.5), exp(11.084 * (fog_param1 * fog_c.x + fog_param0 - 1.5)), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential2: + OS << " vec4 fogc = vec4(4.709 * (fog_param1 * fog_c.x + fog_param0 - 1.5), exp(-pow(4.709 * (fog_param1 * fog_c.x + fog_param0 - 1.5)), 2.), 0., 0.);\n"; + return; + case rsx::fog_mode::linear_abs: + OS << " vec4 fogc = vec4(fog_param1 * abs(fog_c.x) + (fog_param0 - 1.), fog_param1 * abs(fog_c.x) + (fog_param0 - 1.), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential_abs: + OS << " vec4 fogc = vec4(11.084 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5), exp(11.084 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5)), 0., 0.);\n"; + return; + case rsx::fog_mode::exponential2_abs: + OS << " vec4 fogc = vec4(4.709 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5), exp(-pow(4.709 * (fog_param1 * abs(fog_c.x) + fog_param0 - 1.5)), 2.), 0., 0.);\n"; + return; + } + } +} + +void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) +{ + vk::insert_glsl_legacy_function(OS); + + OS << "void main ()" << std::endl; + OS << "{" << std::endl; + + for (const ParamType& PT : m_parr.params[PF_PARAM_NONE]) + { + for (const ParamItem& PI : PT.items) + { + OS << " " << PT.type << " " << PI.name; + if (!PI.value.empty()) + OS << " = " << PI.value; + OS << ";" << std::endl; + } + } + + OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; + + // search if there is fogc in inputs + for (const ParamType& PT : m_parr.params[PF_PARAM_IN]) + { + for (const ParamItem& PI : PT.items) + { + if (PI.name == "fogc") + { + vk::insert_fog_declaration(OS, m_prog.fog_equation); + return; + } + } + } +} + +void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) +{ + const std::pair table[] = + { + { "ocol0", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, + { "ocol1", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, + { "ocol2", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, + { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, + }; + + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) + OS << " " << table[i].first << " = " << table[i].second << ";" << std::endl; + } + + if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + { + { + /** Note: Naruto Shippuden : Ultimate Ninja Storm 2 sets CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS in a shader + * but it writes depth in r1.z and not h2.z. + * Maybe there's a different flag for depth ? + */ + //OS << ((m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? "\tgl_FragDepth = r1.z;\n" : "\tgl_FragDepth = h0.z;\n") << std::endl; + OS << " gl_FragDepth = r1.z;\n"; + } + } + + + OS << "}" << std::endl; +} + +void VKFragmentDecompilerThread::Task() +{ + m_shader = Decompile(); + vk_prog->SetInputs(inputs); +} + +VKFragmentProgram::VKFragmentProgram() +{ +} + +VKFragmentProgram::~VKFragmentProgram() +{ + Delete(); +} + +void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) +{ + u32 size; + VKFragmentDecompilerThread decompiler(shader, parr, prog, size, *this); + decompiler.Task(); + + for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM]) + { + for (const ParamItem& PI : PT.items) + { + if (PT.type == "sampler2D") + continue; + size_t offset = atoi(PI.name.c_str() + 2); + FragmentConstantOffsetCache.push_back(offset); + } + } +} + +void VKFragmentProgram::Compile() +{ + fs::file(fs::get_config_dir() + "FragmentProgram.frag", fom::rewrite).write(shader); + + std::vector spir_v; + if (!vk::compile_glsl_to_spv(shader, vk::glsl::glsl_fragment_program, spir_v)) + throw EXCEPTION("Failed to compile fragment shader"); + + //Create the object and compile + VkShaderModuleCreateInfo fs_info; + fs_info.codeSize = spir_v.size() * sizeof(u32); + fs_info.pNext = nullptr; + fs_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + fs_info.pCode = (uint32_t*)spir_v.data(); + fs_info.flags = 0; + + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkCreateShaderModule(dev, &fs_info, nullptr, &handle); + + id = (u32)(handle); +} + +void VKFragmentProgram::Delete() +{ + shader.clear(); + + if (handle) + { + if (Emu.IsStopped()) + { + LOG_WARNING(RSX, "VKFragmentProgram::Delete(): vkDestroyShaderModule(0x%X) avoided", handle); + } + else + { + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkDestroyShaderModule(dev, handle, NULL); + handle = nullptr; + } + } +} + +void VKFragmentProgram::SetInputs(std::vector& inputs) +{ + for (auto &it : inputs) + { + uniforms.push_back(it); + } +} diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h new file mode 100644 index 0000000000..c7fa7b922d --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -0,0 +1,69 @@ +#pragma once +#include "../Common/FragmentProgramDecompiler.h" +#include "Emu/RSX/RSXFragmentProgram.h" +#include "Utilities/Thread.h" +#include "VulkanAPI.h" +#include "../VK/VKHelpers.h" + +struct VKFragmentDecompilerThread : public FragmentProgramDecompiler +{ + std::string& m_shader; + ParamArray& m_parrDummy; + std::vector inputs; + class VKFragmentProgram *vk_prog; +public: + VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) + : FragmentProgramDecompiler(prog, size) + , m_shader(shader) + , m_parrDummy(parr) + , vk_prog(&dst) + { + } + + void Task(); + const std::vector& get_inputs() { return inputs; } +protected: + virtual std::string getFloatTypeName(size_t elementCount) override; + virtual std::string getFunction(FUNCTION) override; + virtual std::string saturate(const std::string &code) override; + virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override; + + virtual void insertHeader(std::stringstream &OS) override; + virtual void insertIntputs(std::stringstream &OS) override; + virtual void insertOutputs(std::stringstream &OS) override; + virtual void insertConstants(std::stringstream &OS) override; + virtual void insertMainStart(std::stringstream &OS) override; + virtual void insertMainEnd(std::stringstream &OS) override; +}; + +/** Storage for an Fragment Program in the process of of recompilation. + * This class calls OpenGL functions and should only be used from the RSX/Graphics thread. + */ +class VKFragmentProgram +{ +public: + VKFragmentProgram(); + ~VKFragmentProgram(); + + ParamArray parr; + VkShaderModule handle = nullptr; + int id; + std::string shader; + std::vector FragmentConstantOffsetCache; + + std::vector uniforms; + void SetInputs(std::vector& uniforms); + /** + * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. + * @param prog RSXShaderProgram specifying the location and size of the shader in memory + * @param td texture dimensions of input textures + */ + void Decompile(const RSXFragmentProgram& prog); + + /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ + void Compile(); + +private: + /** Deletes the shader and any stored information */ + void Delete(); +}; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 869b4116d3..ae9fbacb6d 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,10 +1,1062 @@ #include "stdafx.h" -#ifndef __APPLE__ +#include "Utilities/rPlatform.h" // only for rImage +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/state.h" #include "VKGSRender.h" +#include "../rsx_methods.h" +#include "../Common/BufferUtils.h" -VKGSRender::VKGSRender() +namespace { + u32 get_max_depth_value(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return 0xFFFF; + case rsx::surface_depth_format::z24s8: return 0xFFFFFF; + } + throw EXCEPTION("Unknow depth format"); + } + u8 get_pixel_size(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return 2; + case rsx::surface_depth_format::z24s8: return 4; + } + throw EXCEPTION("Unknow depth format"); + } } -#endif \ No newline at end of file +namespace vk +{ + VkCompareOp compare_op(u32 gl_name) + { + switch (gl_name) + { + case CELL_GCM_GREATER: + return VK_COMPARE_OP_GREATER; + case CELL_GCM_LESS: + return VK_COMPARE_OP_LESS; + case CELL_GCM_LEQUAL: + return VK_COMPARE_OP_LESS_OR_EQUAL; + case CELL_GCM_GEQUAL: + return VK_COMPARE_OP_EQUAL; + case CELL_GCM_EQUAL: + return VK_COMPARE_OP_EQUAL; + case CELL_GCM_ALWAYS: + return VK_COMPARE_OP_ALWAYS; + default: + throw EXCEPTION("Unsupported compare op: 0x%X", gl_name); + } + } + + VkFormat get_compatible_surface_format(rsx::surface_color_format color_format) + { + switch (color_format) + { + case rsx::surface_color_format::r5g6b5: + return VK_FORMAT_R5G6B5_UNORM_PACK16; + + case rsx::surface_color_format::a8r8g8b8: + return VK_FORMAT_B8G8R8A8_UNORM; + + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + LOG_ERROR(RSX, "Format 0x%X may be buggy.", color_format); + return VK_FORMAT_B8G8R8A8_UNORM; + + case rsx::surface_color_format::w16z16y16x16: + return VK_FORMAT_R16G16B16A16_SFLOAT; + + case rsx::surface_color_format::w32z32y32x32: + return VK_FORMAT_R32G32B32A32_SFLOAT; + + case rsx::surface_color_format::b8: + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::g8b8: + case rsx::surface_color_format::x32: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::a8b8g8r8: + default: + LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format); + return VK_FORMAT_B8G8R8A8_UNORM; + } + } + + VkFormat get_compatible_depth_surface_format(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return VK_FORMAT_D16_UNORM; + case rsx::surface_depth_format::z24s8: return VK_FORMAT_D16_UNORM; + } + throw EXCEPTION("Invalid format (0x%x)", format); + } + + std::vector get_draw_buffers(rsx::surface_target fmt) + { + switch (fmt) + { + case rsx::surface_target::none: + return{}; + case rsx::surface_target::surface_a: + return{ 0 }; + case rsx::surface_target::surface_b: + return{ 1 }; + case rsx::surface_target::surfaces_a_b: + return{ 0, 1 }; + case rsx::surface_target::surfaces_a_b_c: + return{ 0, 1, 2 }; + case rsx::surface_target::surfaces_a_b_c_d: + return{ 0, 1, 2, 3 }; + default: + LOG_ERROR(RSX, "Bad surface color target: %d", fmt); + return{}; + } + } + + VkBlendFactor get_blend_factor(u16 factor) + { + switch (factor) + { + case CELL_GCM_ONE: return VK_BLEND_FACTOR_ONE; + case CELL_GCM_ZERO: return VK_BLEND_FACTOR_ZERO; + case CELL_GCM_SRC_ALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; + case CELL_GCM_DST_ALPHA: return VK_BLEND_FACTOR_DST_ALPHA; + case CELL_GCM_SRC_COLOR: return VK_BLEND_FACTOR_SRC_COLOR; + case CELL_GCM_DST_COLOR: return VK_BLEND_FACTOR_DST_COLOR; + case CELL_GCM_CONSTANT_COLOR: return VK_BLEND_FACTOR_CONSTANT_COLOR; + case CELL_GCM_CONSTANT_ALPHA: return VK_BLEND_FACTOR_CONSTANT_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case CELL_GCM_ONE_MINUS_DST_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case CELL_GCM_ONE_MINUS_SRC_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_DST_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; + case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + default: + throw EXCEPTION("Unknown blend factor 0x%X", factor); + } + }; + + VkBlendOp get_blend_op(u16 op) + { + switch (op) + { + case CELL_GCM_FUNC_ADD: return VK_BLEND_OP_ADD; + case CELL_GCM_FUNC_SUBTRACT: return VK_BLEND_OP_SUBTRACT; + case CELL_GCM_FUNC_REVERSE_SUBTRACT: return VK_BLEND_OP_REVERSE_SUBTRACT; + default: + throw EXCEPTION("Unknown blend op: 0x%X", op); + } + } +} + +VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan) +{ + shaders_cache.load(rsx::shader_language::glsl); + + HINSTANCE hInstance = NULL; + HWND hWnd = (HWND)m_frame->handle(); + + m_thread_context.createInstance("RPCS3"); + m_thread_context.makeCurrentInstance(1); + m_thread_context.enable_debugging(); + + std::vector& gpus = m_thread_context.enumerateDevices(); + m_swap_chain = m_thread_context.createSwapChain(hInstance, hWnd, gpus[0]); + + m_device = (vk::render_device *)(&m_swap_chain->get_device()); + + vk::set_current_thread_ctx(m_thread_context); + vk::set_current_renderer(m_swap_chain->get_device()); + + m_swap_chain->init_swapchain(m_frame->client_size().width, m_frame->client_size().height); + + //create command buffer... + m_command_buffer_pool.create((*m_device)); + m_command_buffer.create(m_command_buffer_pool); + + VkCommandBufferInheritanceInfo inheritance_info; + inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + inheritance_info.pNext = nullptr; + inheritance_info.renderPass = VK_NULL_HANDLE; + inheritance_info.subpass = 0; + inheritance_info.framebuffer = VK_NULL_HANDLE; + inheritance_info.occlusionQueryEnable = VK_FALSE; + inheritance_info.queryFlags = 0; + inheritance_info.pipelineStatistics = 0; + + VkCommandBufferBeginInfo begin_infos; + begin_infos.flags = 0; + begin_infos.pInheritanceInfo = &inheritance_info; + begin_infos.pNext = nullptr; + begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos)); + + for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) + { + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + VK_IMAGE_ASPECT_COLOR_BIT); + } + + CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); + execute_command_buffer(false); + + m_scale_offset_buffer.create((*m_device), 128); + m_vertex_constants_buffer.create((*m_device), 512 * 16); + m_fragment_constants_buffer.create((*m_device), 512 * 16); + m_index_buffer.create((*m_device), 65536, VK_FORMAT_R16_UINT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); +} + +VKGSRender::~VKGSRender() +{ + if (m_submit_fence) + { + vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, 1000000L); + vkDestroyFence((*m_device), m_submit_fence, nullptr); + m_submit_fence = nullptr; + } + + if (m_present_semaphore) + { + vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); + m_present_semaphore = nullptr; + } + + vk::destroy_global_resources(); + + //TODO: Properly destroy shader modules instead of calling clear... + m_prog_buffer.clear(); + + m_scale_offset_buffer.destroy(); + m_vertex_constants_buffer.destroy(); + m_fragment_constants_buffer.destroy(); + m_index_buffer.destroy(); + + if (m_render_pass) + destroy_render_pass(); + + m_command_buffer.destroy(); + m_command_buffer_pool.destroy(); + + m_swap_chain->destroy(); + + m_thread_context.close(); + delete m_swap_chain; +} + +bool VKGSRender::on_access_violation(u32 address, bool is_writing) +{ + if (is_writing) + return m_texture_cache.invalidate_address(address); + + return false; +} + +void VKGSRender::begin() +{ + rsx::thread::begin(); + + //TODO: Fence sync, ring-buffers, etc + //CHECK_RESULT(vkDeviceWaitIdle((*m_device))); + + if (!load_program()) + return; + + if (!recording) + begin_command_buffer_recording(); + + init_buffers(); + + m_program->set_draw_buffer_count(m_draw_buffers_count); + + u32 color_mask = rsx::method_registers[NV4097_SET_COLOR_MASK]; + bool color_mask_b = !!(color_mask & 0xff); + bool color_mask_g = !!((color_mask >> 8) & 0xff); + bool color_mask_r = !!((color_mask >> 16) & 0xff); + bool color_mask_a = !!((color_mask >> 24) & 0xff); + + VkColorComponentFlags mask = 0; + if (color_mask_a) mask |= VK_COLOR_COMPONENT_A_BIT; + if (color_mask_b) mask |= VK_COLOR_COMPONENT_B_BIT; + if (color_mask_g) mask |= VK_COLOR_COMPONENT_G_BIT; + if (color_mask_r) mask |= VK_COLOR_COMPONENT_R_BIT; + + VkColorComponentFlags color_masks[4] = { mask }; + + u8 render_targets[] = { 0, 1, 2, 3 }; + m_program->set_color_mask(m_draw_buffers_count, render_targets, color_masks); + + //TODO stencil mask + m_program->set_depth_write_mask(rsx::method_registers[NV4097_SET_DEPTH_MASK]); + + if (rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE]) + { + m_program->set_depth_test_enable(VK_TRUE); + m_program->set_depth_compare_op(vk::compare_op(rsx::method_registers[NV4097_SET_DEPTH_FUNC])); + } + else + m_program->set_depth_test_enable(VK_FALSE); + + if (rsx::method_registers[NV4097_SET_BLEND_ENABLE]) + { + u32 sfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR]; + u32 dfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR]; + + VkBlendFactor sfactor_rgb = vk::get_blend_factor(sfactor); + VkBlendFactor sfactor_a = vk::get_blend_factor(sfactor >> 16); + VkBlendFactor dfactor_rgb = vk::get_blend_factor(dfactor); + VkBlendFactor dfactor_a = vk::get_blend_factor(dfactor >> 16); + + //TODO: Separate target blending + + VkBool32 blend_state = VK_TRUE; + + m_program->set_blend_state(m_draw_buffers_count, render_targets, blend_state); + m_program->set_blend_func(m_draw_buffers_count, render_targets, sfactor_rgb, dfactor_rgb, sfactor_a, dfactor_a); + + u32 equation = rsx::method_registers[NV4097_SET_BLEND_EQUATION]; + VkBlendOp equation_rgb = vk::get_blend_op(equation); + VkBlendOp equation_a = vk::get_blend_op(equation >> 16); + + m_program->set_blend_op(m_draw_buffers_count, render_targets, equation_rgb, equation_a); + } + else + { + VkBool32 blend_state = VK_FALSE; + m_program->set_blend_state(m_draw_buffers_count, render_targets, blend_state); + } + + if (rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]) + { + if (rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFF && + rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFFFFFF) + { + LOG_ERROR(RSX, "Custom primitive restart index 0x%X. Should rewrite index buffer with proper value!", rsx::method_registers[NV4097_SET_RESTART_INDEX]); + } + + LOG_ERROR(RSX, "Primitive restart enabled!"); + m_program->set_primitive_restart(VK_TRUE); + } + else + m_program->set_primitive_restart(VK_FALSE); + + u32 line_width = rsx::method_registers[NV4097_SET_LINE_WIDTH]; + float actual_line_width = (line_width >> 3) + (line_width & 7) / 8.f; + + vkCmdSetLineWidth(m_command_buffer, actual_line_width); + + //TODO: Set up other render-state parameters into the program pipeline + + VkRenderPassBeginInfo rp_begin; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = m_render_pass; + rp_begin.framebuffer = m_framebuffer; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_frame->client_size().width; + rp_begin.renderArea.extent.height = m_frame->client_size().height; + rp_begin.clearValueCount = 0; + rp_begin.pClearValues = nullptr; + + vkCmdBeginRenderPass(m_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + m_draw_calls++; +} + +namespace +{ + bool normalize(rsx::vertex_base_type type) + { + switch (type) + { + case rsx::vertex_base_type::s1: + case rsx::vertex_base_type::ub: + case rsx::vertex_base_type::cmp: + return true; + case rsx::vertex_base_type::f: + case rsx::vertex_base_type::sf: + case rsx::vertex_base_type::ub256: + case rsx::vertex_base_type::s32k: + return false; + } + throw EXCEPTION("unknown vertex type"); + } +} + +void VKGSRender::end() +{ + vk::texture *texture0 = nullptr; + for (int i = 0; i < rsx::limits::textures_count; ++i) + { + if (m_program->has_uniform(vk::glsl::glsl_fragment_program, "tex" + std::to_string(i))) + { + if (!textures[i].enabled()) + { + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "tex" + std::to_string(i)); + continue; + } + + vk::texture &tex = (texture0)? (*texture0): m_texture_cache.upload_texture(m_command_buffer, textures[i], m_rtts); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "tex" + std::to_string(i), tex); + texture0 = &tex; + } + } + + auto &upload_info = upload_vertex_data(); + + m_program->set_primitive_topology(std::get<0>(upload_info)); + m_program->use(m_command_buffer, m_render_pass, 0); + + if (!std::get<1>(upload_info)) + vkCmdDraw(m_command_buffer, vertex_draw_count, 1, 0, 0); + else + { + VkIndexType &index_type = std::get<3>(upload_info); + u32 &index_count = std::get<2>(upload_info); + + vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer, 0, index_type); + vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0); + } + + vkCmdEndRenderPass(m_command_buffer); + + m_texture_cache.flush(m_command_buffer); + + end_command_buffer_recording(); + execute_command_buffer(false); + + //Finish() + vkDeviceWaitIdle((*m_device)); + + rsx::thread::end(); +} + +void VKGSRender::set_viewport() +{ + u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL]; + u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL]; + + u16 viewport_x = viewport_horizontal & 0xffff; + u16 viewport_y = viewport_vertical & 0xffff; + u16 viewport_w = viewport_horizontal >> 16; + u16 viewport_h = viewport_vertical >> 16; + + u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL]; + u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]; + u16 scissor_x = scissor_horizontal; + u16 scissor_w = scissor_horizontal >> 16; + u16 scissor_y = scissor_vertical; + u16 scissor_h = scissor_vertical >> 16; + +// u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW]; +// rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf); + + VkViewport viewport; + viewport.x = viewport_x; + viewport.y = viewport_y; + viewport.width = viewport_w; + viewport.height = viewport_h; + viewport.minDepth = 0.f; + viewport.maxDepth = 1.f; + + vkCmdSetViewport(m_command_buffer, 0, 1, &viewport); + + VkRect2D scissor; + scissor.extent.height = scissor_h; + scissor.extent.width = scissor_w; + scissor.offset.x = scissor_x; + scissor.offset.y = scissor_y; + + vkCmdSetScissor(m_command_buffer, 0, 1, &scissor); +} + +void VKGSRender::on_init_thread() +{ + GSRender::on_init_thread(); + + for (auto &attrib_buffer : m_attrib_buffers) + { + attrib_buffer.create((*m_device), 65536, VK_FORMAT_R8_UNORM, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT); + + u8 *data = static_cast(attrib_buffer.map(0, 65536)); + memset(data, 0, 65536); + attrib_buffer.unmap(); + } +} + +void VKGSRender::on_exit() +{ + m_texture_cache.destroy(); + + for (auto &attrib_buffer : m_attrib_buffers) + { + attrib_buffer.destroy(); + } +} + +void VKGSRender::clear_surface(u32 mask) +{ + //TODO: Build clear commands into current renderpass descriptor set + if (!(mask & 0xF3)) return; + + if (m_current_present_image== 0xFFFF) return; + + bool was_recording = recording; + + if (!was_recording) + begin_command_buffer_recording(); + + init_buffers(); + + float depth_clear = 1.f; + u32 stencil_clear = 0.f; + + VkClearValue depth_stencil_clear_values, color_clear_values; + VkImageSubresourceRange depth_range = vk::default_image_subresource_range(); + depth_range.aspectMask = 0; + + if (mask & 0x1) + { + rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7); + u32 max_depth_value = get_max_depth_value(surface_depth_format); + + u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8; + float depth_clear = (float)clear_depth / max_depth_value; + + depth_range.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + depth_stencil_clear_values.depthStencil.depth = depth_clear; + depth_stencil_clear_values.depthStencil.stencil = stencil_clear; + } + +/* if (mask & 0x2) + { + u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff; + u32 stencil_mask = rsx::method_registers[NV4097_SET_STENCIL_MASK]; + + //TODO set stencil mask + depth_range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + depth_stencil_clear_values.depthStencil.stencil = stencil_mask; + }*/ + + if (mask & 0xF0) + { + u32 clear_color = rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]; + u8 clear_a = clear_color >> 24; + u8 clear_r = clear_color >> 16; + u8 clear_g = clear_color >> 8; + u8 clear_b = clear_color; + + //TODO set color mask + /*VkBool32 clear_red = (VkBool32)!!(mask & 0x20); + VkBool32 clear_green = (VkBool32)!!(mask & 0x40); + VkBool32 clear_blue = (VkBool32)!!(mask & 0x80); + VkBool32 clear_alpha = (VkBool32)!!(mask & 0x10);*/ + + color_clear_values.color.float32[0] = (float)clear_r / 255; + color_clear_values.color.float32[1] = (float)clear_g / 255; + color_clear_values.color.float32[2] = (float)clear_b / 255; + color_clear_values.color.float32[3] = (float)clear_a / 255; + + VkImageSubresourceRange range = vk::default_image_subresource_range(); + + for (u32 i = 0; i < m_rtts.m_bound_render_targets.size(); ++i) + { + if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr) continue; + + VkImage color_image = (*std::get<1>(m_rtts.m_bound_render_targets[i])); + vkCmdClearColorImage(m_command_buffer, color_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &color_clear_values.color, 1, &range); + } + } + + if (mask & 0x3) + vkCmdClearDepthStencilImage(m_command_buffer, (*std::get<1>(m_rtts.m_bound_depth_stencil)), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depth_stencil_clear_values.depthStencil, 1, &depth_range); + + if (!was_recording) + { + end_command_buffer_recording(); + execute_command_buffer(false); + } + + recording = was_recording; +} + +bool VKGSRender::do_method(u32 cmd, u32 arg) +{ + switch (cmd) + { + case NV4097_CLEAR_SURFACE: + clear_surface(arg); + return true; + default: + return false; + } +} + +void VKGSRender::init_render_pass(VkFormat surface_format, VkFormat depth_format, u8 num_draw_buffers, u8 *draw_buffers) +{ + //TODO: Create buffers as requested by the game. Render to swapchain for now.. + /* Describe a render pass and framebuffer attachments */ + VkAttachmentDescription attachments[2]; + memset(&attachments, 0, sizeof attachments); + + attachments[0].format = surface_format; + attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; //Set to clear removes warnings about empty contents after flip; overwrites previous calls + attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; //PRESENT_SRC_KHR?? + attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + attachments[1].format = VK_FORMAT_D16_UNORM; /* Depth buffer format. Should be more elegant than this */ + attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkAttachmentReference template_color_reference; + template_color_reference.attachment = VK_ATTACHMENT_UNUSED; + template_color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkAttachmentReference depth_reference; + depth_reference.attachment = num_draw_buffers; + depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + //Fill in draw_buffers information... + VkAttachmentDescription real_attachments[4]; + VkAttachmentReference color_references[4]; + + for (int i = 0; i < num_draw_buffers; ++i) + { + real_attachments[i] = attachments[0]; + + color_references[i] = template_color_reference; + color_references[i].attachment = (draw_buffers)? draw_buffers[i]: i; + } + + real_attachments[num_draw_buffers] = attachments[1]; + + VkSubpassDescription subpass; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.flags = 0; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + subpass.colorAttachmentCount = num_draw_buffers; + subpass.pColorAttachments = num_draw_buffers? color_references: nullptr; + subpass.pResolveAttachments = nullptr; + subpass.pDepthStencilAttachment = &depth_reference; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = nullptr; + + VkRenderPassCreateInfo rp_info; + rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + rp_info.pNext = NULL; + rp_info.attachmentCount = num_draw_buffers+1; + rp_info.pAttachments = real_attachments; + rp_info.subpassCount = 1; + rp_info.pSubpasses = &subpass; + rp_info.dependencyCount = 0; + rp_info.pDependencies = NULL; + rp_info.flags = 0; + + CHECK_RESULT(vkCreateRenderPass((*m_device), &rp_info, NULL, &m_render_pass)); +} + +void VKGSRender::destroy_render_pass() +{ + vkDestroyRenderPass((*m_device), m_render_pass, nullptr); + m_render_pass = nullptr; +} + +bool VKGSRender::load_program() +{ + RSXVertexProgram vertex_program = get_current_vertex_program(); + RSXFragmentProgram fragment_program = get_current_fragment_program(); + + //Load current program from buffer + m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); + + //TODO: Update constant buffers.. + //1. Update scale-offset matrix + //2. Update vertex constants + //3. Update fragment constants + u8 *buf = (u8*)m_scale_offset_buffer.map(0, VK_WHOLE_SIZE); + + //TODO: Add case for this in RSXThread + /** + * NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z + * Its like D3D without the flip in y (depending on how you build the spir-v) + */ + { + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + float scale_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); + float offset_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); + offset_x /= clip_w / 2.f; + + float scale_y = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); + float offset_y = ((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); + offset_y /= clip_h / 2.f; + + float scale_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; + float offset_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; + + float one = 1.f; + + stream_vector(buf, (u32&)scale_x, 0, 0, (u32&)offset_x); + stream_vector((char*)buf + 16, 0, (u32&)scale_y, 0, (u32&)offset_y); + stream_vector((char*)buf + 32, 0, 0, (u32&)scale_z, (u32&)offset_z); + stream_vector((char*)buf + 48, 0, 0, 0, (u32&)one); + } + + memset((char*)buf+64, 0, 8); +// memcpy((char*)buf + 64, &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); +// memcpy((char*)buf + 68, &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); + m_scale_offset_buffer.unmap(); + + buf = (u8*)m_vertex_constants_buffer.map(0, VK_WHOLE_SIZE); + fill_vertex_program_constants_data(buf); + m_vertex_constants_buffer.unmap(); + + size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + buf = (u8*)m_fragment_constants_buffer.map(0, fragment_constants_sz); + m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_sz) }, fragment_program); + m_fragment_constants_buffer.unmap(); + + m_program->bind_uniform(vk::glsl::glsl_vertex_program, "ScaleOffsetBuffer", m_scale_offset_buffer); + m_program->bind_uniform(vk::glsl::glsl_vertex_program, "VertexConstantsBuffer", m_vertex_constants_buffer); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "ScaleOffsetBuffer", m_scale_offset_buffer); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "FragmentConstantsBuffer", m_fragment_constants_buffer); + + return true; +} + +static const u32 mr_color_offset[rsx::limits::color_buffers_count] = +{ + NV4097_SET_SURFACE_COLOR_AOFFSET, + NV4097_SET_SURFACE_COLOR_BOFFSET, + NV4097_SET_SURFACE_COLOR_COFFSET, + NV4097_SET_SURFACE_COLOR_DOFFSET +}; + +static const u32 mr_color_dma[rsx::limits::color_buffers_count] = +{ + NV4097_SET_CONTEXT_DMA_COLOR_A, + NV4097_SET_CONTEXT_DMA_COLOR_B, + NV4097_SET_CONTEXT_DMA_COLOR_C, + NV4097_SET_CONTEXT_DMA_COLOR_D +}; + +static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = +{ + NV4097_SET_SURFACE_PITCH_A, + NV4097_SET_SURFACE_PITCH_B, + NV4097_SET_SURFACE_PITCH_C, + NV4097_SET_SURFACE_PITCH_D +}; + +void VKGSRender::init_buffers(bool skip_reading) +{ + if (dirty_frame) + { + //Prepare surface for new frame + VkSemaphoreCreateInfo semaphore_info; + semaphore_info.flags = 0; + semaphore_info.pNext = nullptr; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore); + + VkFence nullFence = VK_NULL_HANDLE; + CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, nullFence, &m_current_present_image)); + + dirty_frame = false; + } + + prepare_rtts(); + + if (!skip_reading) + { + read_buffers(); + } + + set_viewport(); +} + +void VKGSRender::read_buffers() +{ +} + +void VKGSRender::write_buffers() +{ +} + +void VKGSRender::begin_command_buffer_recording() +{ + VkCommandBufferInheritanceInfo inheritance_info; + inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + inheritance_info.pNext = nullptr; + inheritance_info.renderPass = VK_NULL_HANDLE; + inheritance_info.subpass = 0; + inheritance_info.framebuffer = VK_NULL_HANDLE; + inheritance_info.occlusionQueryEnable = VK_FALSE; + inheritance_info.queryFlags = 0; + inheritance_info.pipelineStatistics = 0; + + VkCommandBufferBeginInfo begin_infos; + begin_infos.flags = 0; + begin_infos.pInheritanceInfo = &inheritance_info; + begin_infos.pNext = nullptr; + begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + if (m_submit_fence) + { + vkWaitForFences(*m_device, 1, &m_submit_fence, VK_TRUE, ~0ULL); + vkDestroyFence(*m_device, m_submit_fence, nullptr); + m_submit_fence = nullptr; + + CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0)); + } + + CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos)); + recording = true; +} + +void VKGSRender::end_command_buffer_recording() +{ + recording = false; + CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); +} + +void VKGSRender::prepare_rtts() +{ + u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; + + if (!m_rtts_dirty) + return; + + m_rtts_dirty = false; + bool reconfigure_render_pass = true; + + if (m_surface.format != surface_format) + { + m_surface.unpack(surface_format); + reconfigure_render_pass = true; + } + + u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL]; + u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL]; + + u32 clip_width = clip_horizontal >> 16; + u32 clip_height = clip_vertical >> 16; + u32 clip_x = clip_horizontal; + u32 clip_y = clip_vertical; + + m_rtts.prepare_render_target(&m_command_buffer, + surface_format, + clip_horizontal, clip_vertical, + rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]), + get_color_surface_addresses(), get_zeta_surface_address(), + (*m_device), &m_command_buffer); + + //Bind created rtts as current fbo... + VkImageView attachments[5]; + std::vector draw_buffers = vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])); + + m_framebuffer.destroy(); + std::vector fbo_images; + + for (u8 index: draw_buffers) + { + vk::texture *raw = std::get<1>(m_rtts.m_bound_render_targets[index]); + VkImageView as_image = (*raw); + fbo_images.push_back(as_image); + } + + if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) + { + vk::texture *raw = (std::get<1>(m_rtts.m_bound_depth_stencil)); + VkImageView depth_image = (*raw); + fbo_images.push_back(depth_image); + } + + if (reconfigure_render_pass) + { + //Create render pass with draw_buffers information + //Somewhat simliar to glDrawBuffers + + if (m_render_pass) + destroy_render_pass(); + + init_render_pass(vk::get_compatible_surface_format(m_surface.color_format), + vk::get_compatible_depth_surface_format(m_surface.depth_format), + draw_buffers.size(), + draw_buffers.data()); + } + + m_framebuffer.create((*m_device), m_render_pass, fbo_images.data(), fbo_images.size(), + clip_width, clip_height); + + m_draw_buffers_count = draw_buffers.size(); +} + +void VKGSRender::execute_command_buffer(bool wait) +{ + if (recording) + throw EXCEPTION("execute_command_buffer called before end_command_buffer_recording()!"); + + if (m_submit_fence) + throw EXCEPTION("Synchronization deadlock!"); + + VkFenceCreateInfo fence_info; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.flags = 0; + fence_info.pNext = nullptr; + + CHECK_RESULT(vkCreateFence(*m_device, &fence_info, nullptr, &m_submit_fence)); + + VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + VkCommandBuffer cmd = m_command_buffer; + + VkSubmitInfo infos; + infos.commandBufferCount = 1; + infos.pCommandBuffers = &cmd; + infos.pNext = nullptr; + infos.pSignalSemaphores = nullptr; + infos.pWaitDstStageMask = &pipe_stage_flags; + infos.signalSemaphoreCount = 0; + infos.waitSemaphoreCount = 0; + infos.pWaitSemaphores = nullptr; + infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + + CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &infos, m_submit_fence)); + CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue())); +} + +void VKGSRender::flip(int buffer) +{ + //LOG_NOTICE(Log::RSX, "flip(%d)", buffer); + u32 buffer_width = gcm_buffers[buffer].width; + u32 buffer_height = gcm_buffers[buffer].height; + u32 buffer_pitch = gcm_buffers[buffer].pitch; + + rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + + areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); + + coordi aspect_ratio; + if (1) //enable aspect ratio + { + sizei csize = m_frame->client_size(); + sizei new_size = csize; + + const double aq = (double)buffer_width / buffer_height; + const double rq = (double)new_size.width / new_size.height; + const double q = aq / rq; + + if (q > 1.0) + { + new_size.height = int(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; + } + else if (q < 1.0) + { + new_size.width = int(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; + } + + aspect_ratio.size = new_size; + } + else + { + aspect_ratio.size = m_frame->client_size(); + } + + //Check if anything is waiting in queue and submit it if possible.. + if (m_submit_fence) + { + CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); + + vkDestroyFence((*m_device), m_submit_fence, nullptr); + m_submit_fence = nullptr; + + CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0)); + } + + VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); + uint32_t next_image_temp = 0; + + VkPresentInfoKHR present; + present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present.pNext = nullptr; + present.swapchainCount = 1; + present.pSwapchains = &swap_chain; + present.pImageIndices = &m_current_present_image; + present.pWaitSemaphores = &m_present_semaphore; + present.waitSemaphoreCount = 1; + + if (m_render_pass) + { + begin_command_buffer_recording(); + + if (m_present_semaphore) + { + //Blit contents to screen.. + VkImage image_to_flip = nullptr; + + if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr) + image_to_flip = (*std::get<1>(m_rtts.m_bound_render_targets[0])); + else + image_to_flip = (*std::get<1>(m_rtts.m_bound_render_targets[1])); + + VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image); + vk::copy_scaled_image(m_command_buffer, image_to_flip, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + buffer_width, buffer_height, aspect_ratio.width, aspect_ratio.height, 1, VK_IMAGE_ASPECT_COLOR_BIT); + } + else + { + //No draw call was issued! + //TODO: Properly clear the background to rsx value + m_swap_chain->acquireNextImageKHR((*m_device), (*m_swap_chain), ~0ULL, VK_NULL_HANDLE, VK_NULL_HANDLE, &next_image_temp); + + VkImageSubresourceRange range = vk::default_image_subresource_range(); + VkClearColorValue clear_black = { 0 }; + vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(next_image_temp), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, &clear_black, 1, &range); + + present.pImageIndices = &next_image_temp; + present.waitSemaphoreCount = 0; + } + + end_command_buffer_recording(); + execute_command_buffer(false); + + CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); + CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue())); + + if (m_present_semaphore) + { + vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); + m_present_semaphore = nullptr; + } + } + + //Feed back damaged resources to the main texture cache for management... + m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources); + m_rtts.invalidated_resources.clear(); + + m_draw_calls = 0; + dirty_frame = true; + m_frame->flip(m_context); +} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 70861d5415..d4d930fc6c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -1,8 +1,98 @@ #pragma once -#include -#include +#include "Emu/RSX/GSRender.h" +#include "VKHelpers.h" +#include "VKTextureCache.h" +#include "VKRenderTargets.h" -class VKGSRender +#define RSX_DEBUG 1 + +#include "VKProgramBuffer.h" +#include "../GCM.h" + +class VKGSRender : public GSRender { +private: + VKFragmentProgram m_fragment_prog; + VKVertexProgram m_vertex_prog; + + vk::glsl::program *m_program; + vk::context m_thread_context; + + rsx::surface_info m_surface; + + vk::buffer m_attrib_buffers[rsx::limits::vertex_count]; + + vk::texture_cache m_texture_cache; + rsx::vk_render_targets m_rtts; + +public: + //vk::fbo draw_fbo; + +private: + VKProgramBuffer m_prog_buffer; + + vk::render_device *m_device; + vk::swap_chain* m_swap_chain; + //buffer + + vk::buffer m_scale_offset_buffer; + vk::buffer m_vertex_constants_buffer; + vk::buffer m_fragment_constants_buffer; + + vk::buffer m_index_buffer; + + //Vulkan internals + u32 m_current_present_image = 0xFFFF; + VkSemaphore m_present_semaphore = nullptr; + + u32 m_current_sync_buffer_index = 0; + VkFence m_submit_fence = nullptr; + + vk::command_pool m_command_buffer_pool; + vk::command_buffer m_command_buffer; + bool recording = false; + bool dirty_frame = true; + + //Single render pass + VkRenderPass m_render_pass = nullptr; + + u32 m_draw_calls = 0; + + u8 m_draw_buffers_count = 0; + vk::framebuffer m_framebuffer; + +public: VKGSRender(); -}; \ No newline at end of file + ~VKGSRender(); + +private: + void clear_surface(u32 mask); + void init_render_pass(VkFormat surface_format, VkFormat depth_format, u8 num_draw_buffers, u8 *draw_buffers); + void destroy_render_pass(); + void execute_command_buffer(bool wait); + void begin_command_buffer_recording(); + void end_command_buffer_recording(); + + void prepare_rtts(); + + std::tuple + upload_vertex_data(); + +public: + bool load_program(); + void init_buffers(bool skip_reading = false); + void read_buffers(); + void write_buffers(); + void set_viewport(); + +protected: + void begin() override; + void end() override; + + void on_init_thread() override; + void on_exit() override; + bool do_method(u32 id, u32 arg) override; + void flip(int buffer) override; + + bool on_access_violation(u32 address, bool is_writing) override; +}; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp new file mode 100644 index 0000000000..001265384f --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -0,0 +1,285 @@ +#include "stdafx.h" +#include "VKHelpers.h" + +namespace vk +{ + context *g_current_vulkan_ctx = nullptr; + render_device g_current_renderer; + + buffer g_null_buffer; + texture g_null_texture; + + VkSampler g_null_sampler = nullptr; + VkImageView g_null_image_view = nullptr; + + VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) + { + return realloc(pOriginal, size); + } + + VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) + { + return _aligned_malloc(size, alignment); + } + + VKAPI_ATTR void VKAPI_CALL mem_free(void *pUserData, void *pMemory) + { + _aligned_free(pMemory); + } + + VkFormat get_compatible_sampler_format(u32 format, VkComponentMapping& swizzle, u8 swizzle_mask) + { + u8 remap_a = swizzle_mask & 0x3; + u8 remap_r = (swizzle_mask >> 2) & 0x3; + u8 remap_g = (swizzle_mask >> 4) & 0x3; + u8 remap_b = (swizzle_mask >> 6) & 0x3; + + VkComponentSwizzle map_table[] = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; + + VkComponentMapping remapped; + remapped.a = map_table[remap_a]; + remapped.b = map_table[remap_b]; + remapped.g = map_table[remap_g]; + remapped.r = map_table[remap_r]; + + swizzle = default_component_map(); + + switch (format) + { + case CELL_GCM_TEXTURE_B8: + { + swizzle = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; + return VK_FORMAT_R8_UNORM; + } + case CELL_GCM_TEXTURE_A1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A4R4G4B4: return VK_FORMAT_B4G4R4A4_UNORM_PACK16; + case CELL_GCM_TEXTURE_R5G6B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A8R8G8B8: + { + swizzle = remapped; + return VK_FORMAT_B8G8R8A8_UNORM; + } + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + { + return VK_FORMAT_BC3_UNORM_BLOCK; + } + case CELL_GCM_TEXTURE_G8B8: + { + swizzle = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }; + return VK_FORMAT_R8G8_UNORM; + } + case CELL_GCM_TEXTURE_R6G5B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; //Expand, discard high bit? + case CELL_GCM_TEXTURE_DEPTH24_D8: return VK_FORMAT_R32_UINT; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return VK_FORMAT_R32_SFLOAT; + case CELL_GCM_TEXTURE_DEPTH16: return VK_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return VK_FORMAT_R16_SFLOAT; + case CELL_GCM_TEXTURE_X16: return VK_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16: return VK_FORMAT_R16G16_UNORM; + case CELL_GCM_TEXTURE_R5G5B5A1: return VK_FORMAT_R5G5B5A1_UNORM_PACK16; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT; + case CELL_GCM_TEXTURE_X32_FLOAT: return VK_FORMAT_R32_SFLOAT; + case CELL_GCM_TEXTURE_D1R5G5B5: + { + swizzle.a = VK_COMPONENT_SWIZZLE_ONE; + return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + } + case CELL_GCM_TEXTURE_D8R8G8B8: + { + swizzle = remapped; + swizzle.a = VK_COMPONENT_SWIZZLE_ONE; + return VK_FORMAT_B8G8R8A8_UNORM; + } + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; //Expand + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8B8A8_UNORM; //Expand + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + break; + } + throw EXCEPTION("Invalid or unsupported texture format (0x%x)", format); + } + + VkAllocationCallbacks default_callbacks() + { + VkAllocationCallbacks callbacks; + callbacks.pfnAllocation = vk::mem_alloc; + callbacks.pfnFree = vk::mem_free; + callbacks.pfnReallocation = vk::mem_realloc; + + return callbacks; + } + + VkBuffer null_buffer() + { + if (g_null_buffer.size()) + return g_null_buffer; + + g_null_buffer.create(g_current_renderer, 32, VK_FORMAT_R32_SFLOAT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT); + return g_null_buffer; + } + + VkSampler null_sampler() + { + if (g_null_sampler) + return g_null_sampler; + + VkSamplerCreateInfo sampler_info; + memset(&sampler_info, 0, sizeof(sampler_info)); + + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_info.anisotropyEnable = VK_FALSE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.pNext = nullptr; + sampler_info.unnormalizedCoordinates = VK_FALSE; + sampler_info.mipLodBias = 0; + sampler_info.maxAnisotropy = 0; + sampler_info.magFilter = VK_FILTER_NEAREST; + sampler_info.minFilter = VK_FILTER_NEAREST; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + vkCreateSampler(g_current_renderer, &sampler_info, nullptr, &g_null_sampler); + return g_null_sampler; + } + + VkImageView null_image_view() + { + if (g_null_image_view) + return g_null_image_view; + + g_null_texture.create(g_current_renderer, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT, 4, 4); + g_null_image_view = g_null_texture; + return g_null_image_view; + } + + VkBufferView null_buffer_view() + { + if (g_null_buffer.size()) + return g_null_buffer; + + g_null_buffer.create(g_current_renderer, 32, VK_FORMAT_R32_SFLOAT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT); + return g_null_buffer; + } + + void destroy_global_resources() + { + g_null_buffer.destroy(); + g_null_texture.destroy(); + + if (g_null_sampler) + vkDestroySampler(g_current_renderer, g_null_sampler, nullptr); + + g_null_sampler = nullptr; + g_null_image_view = nullptr; + } + + void set_current_thread_ctx(const vk::context &ctx) + { + g_current_vulkan_ctx = (vk::context *)&ctx; + } + + context *get_current_thread_ctx() + { + return g_current_vulkan_ctx; + } + + vk::render_device *get_current_renderer() + { + return &g_current_renderer; + } + + void set_current_renderer(const vk::render_device &device) + { + g_current_renderer = device; + } + + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageAspectFlags aspect_flags) + { + //Prepare an image to match the new layout.. + VkImageSubresourceRange range = default_image_subresource_range(); + range.aspectMask = aspect_flags; + + VkImageMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.newLayout = new_layout; + barrier.oldLayout = current_layout; + barrier.image = image; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = 0; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange = range; + + switch (new_layout) + { + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break; + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; break; + } + + switch (current_layout) + { + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break; + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; break; + } + + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + } + + VKAPI_ATTR VkBool32 VKAPI_CALL dbgFunc(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, void *pUserData) + { + if (msgFlags & VK_DEBUG_REPORT_ERROR_BIT_EXT) + { + LOG_ERROR(RSX, "ERROR: [%s] Code %d : %s", pLayerPrefix, msgCode, pMsg); + } + else if (msgFlags & VK_DEBUG_REPORT_WARNING_BIT_EXT) + { + LOG_WARNING(RSX, "WARNING: [%s] Code %d : %s", pLayerPrefix, msgCode, pMsg); + } + else + { + return false; + } + + //Let the app crash.. + return false; + } + + VkBool32 BreakCallback(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, void *pUserData) + { + DebugBreak(); + + return false; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h new file mode 100644 index 0000000000..6bb3cf692a --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -0,0 +1,1314 @@ +#pragma once + +#include "stdafx.h" +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "VulkanAPI.h" +#include "../GCM.h" + +//Set to 9 to enable all debug layers. Will cause significant slowdowns. Eventually to be replaced with GUI checkbox +#define VK_ENABLED_LAYER_COUNT 0 + +namespace rsx +{ + class texture; +} + +namespace vk +{ +#define CHECK_RESULT(expr) { VkResult __res = expr; if(__res != VK_SUCCESS) throw EXCEPTION("Assertion failed! Result is %Xh", __res); } + + VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); + VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); + VKAPI_ATTR void VKAPI_CALL mem_free(void *pUserData, void *pMemory); + + VKAPI_ATTR VkBool32 VKAPI_CALL dbgFunc(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, void *pUserData); + + VkBool32 BreakCallback(VkFlags msgFlags, VkDebugReportObjectTypeEXT objType, + uint64_t srcObject, size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg, + void *pUserData); + + //VkAllocationCallbacks default_callbacks(); + + class context; + class render_device; + class swap_chain_image; + class physical_device; + class command_buffer; + + vk::context *get_current_thread_ctx(); + void set_current_thread_ctx(const vk::context &ctx); + + vk::render_device *get_current_renderer(); + void set_current_renderer(const vk::render_device &device); + + VkComponentMapping default_component_map(); + VkImageSubresource default_image_subresource(); + VkImageSubresourceRange default_image_subresource_range(); + + VkBuffer null_buffer(); + VkSampler null_sampler(); + VkImageView null_image_view(); + VkBufferView null_buffer_view(); + + void destroy_global_resources(); + + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageAspectFlags aspect_flags); + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect); + void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_width, u32 src_height, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect); + + VkFormat get_compatible_sampler_format(u32 format, VkComponentMapping& mapping, u8 swizzle_mask=0); + VkFormat get_compatible_surface_format(rsx::surface_color_format color_format); + VkFormat get_compatible_depth_surface_format(rsx::surface_depth_format depth_format); + + class physical_device + { + VkPhysicalDevice dev = nullptr; + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties memory_properties; + std::vector queue_props; + + public: + + physical_device() {} + ~physical_device() {} + + void set_device(VkPhysicalDevice pdev) + { + dev = pdev; + vkGetPhysicalDeviceProperties(pdev, &props); + vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties); + } + + std::string name() + { + return props.deviceName; + } + + uint32_t get_queue_count() + { + if (queue_props.size()) + return queue_props.size(); + + uint32_t count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr); + + return count; + } + + VkQueueFamilyProperties get_queue_properties(uint32_t queue) + { + if (!queue_props.size()) + { + uint32_t count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr); + + queue_props.resize(count); + vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, queue_props.data()); + } + + if (queue >= queue_props.size()) throw EXCEPTION("Undefined trap"); + return queue_props[queue]; + } + + VkPhysicalDeviceMemoryProperties get_memory_properties() + { + return memory_properties; + } + + operator VkPhysicalDevice() + { + return dev; + } + }; + + class render_device + { + vk::physical_device *pgpu; + VkDevice dev; + + public: + + render_device() + { + dev = nullptr; + pgpu = nullptr; + } + + render_device(vk::physical_device &pdev, uint32_t graphics_queue_idx) + { + VkResult err; + + float queue_priorities[1] = { 0.f }; + pgpu = &pdev; + + VkDeviceQueueCreateInfo queue; + queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue.pNext = NULL; + queue.queueFamilyIndex = graphics_queue_idx; + queue.queueCount = 1; + queue.pQueuePriorities = queue_priorities; + + //Set up instance information + const char *requested_extensions[] = + { + "VK_KHR_swapchain" + }; + + const char *validation_layers[] = + { + "VK_LAYER_LUNARG_threading", "VK_LAYER_LUNARG_mem_tracker", + "VK_LAYER_LUNARG_object_tracker", "VK_LAYER_LUNARG_draw_state", + "VK_LAYER_LUNARG_param_checker", "VK_LAYER_LUNARG_swapchain", + "VK_LAYER_LUNARG_device_limits", "VK_LAYER_LUNARG_image", + "VK_LAYER_GOOGLE_unique_objects", + }; + + VkDeviceCreateInfo device; + device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device.pNext = NULL; + device.queueCreateInfoCount = 1; + device.pQueueCreateInfos = &queue; + device.enabledLayerCount = VK_ENABLED_LAYER_COUNT; + device.ppEnabledLayerNames = validation_layers; + device.enabledExtensionCount = 1; + device.ppEnabledExtensionNames = requested_extensions; + device.pEnabledFeatures = nullptr; + + err = vkCreateDevice(*pgpu, &device, nullptr, &dev); + if (err != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + } + + ~render_device() + { + } + + void destroy() + { + if (dev && pgpu) + { + vkDestroyDevice(dev, nullptr); + dev = nullptr; + } + } + + bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32 *type_index) + { + VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties(); + + for (uint32_t i = 0; i < 32; i++) + { + if ((typeBits & 1) == 1) + { + if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask) + { + *type_index = i; + return true; + } + } + + typeBits >>= 1; + } + + return false; + } + + vk::physical_device& gpu() + { + return *pgpu; + } + + operator VkDevice() + { + return dev; + } + }; + + class memory_block + { + VkDeviceMemory vram = nullptr; + vk::render_device *owner = nullptr; + u32 vram_block_sz = 0; + + public: + memory_block() {} + ~memory_block() {} + + void allocate_from_pool(vk::render_device &device, u32 block_sz, u32 typeBits) + { + if (vram) + destroy(); + + u32 typeIndex = 0; + + owner = (vk::render_device*)&device; + VkDevice dev = (VkDevice)(*owner); + + if (!owner->get_compatible_memory_type(typeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &typeIndex)) + throw EXCEPTION("Could not find suitable memory type!"); + + VkMemoryAllocateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + infos.pNext = nullptr; + infos.allocationSize = block_sz; + infos.memoryTypeIndex = typeIndex; + + CHECK_RESULT(vkAllocateMemory(dev, &infos, nullptr, &vram)); + vram_block_sz = block_sz; + } + + void destroy() + { + VkDevice dev = (VkDevice)(*owner); + vkFreeMemory(dev, vram, nullptr); + + owner = nullptr; + vram = nullptr; + vram_block_sz = 0; + } + + vk::render_device& get_owner() + { + return (*owner); + } + + operator VkDeviceMemory() + { + return vram; + } + }; + + class texture + { + VkImageView m_view = nullptr; + VkSampler m_sampler = nullptr; + VkImage m_image_contents = nullptr; + VkMemoryRequirements m_memory_layout; + VkFormat m_internal_format; + VkImageUsageFlags m_flags; + VkImageAspectFlagBits m_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; + VkImageViewType m_view_type = VK_IMAGE_VIEW_TYPE_2D; + VkImageUsageFlags m_usage = VK_IMAGE_USAGE_SAMPLED_BIT; + VkImageTiling m_tiling = VK_IMAGE_TILING_LINEAR; + + vk::memory_block vram_allocation; + vk::render_device *owner = nullptr; + + u32 m_width; + u32 m_height; + u32 m_mipmaps; + + vk::texture *staging_texture = nullptr; + bool ready = false; + + VkSamplerAddressMode vk_wrap_mode(u32 gcm_wrap_mode); + float max_aniso(u32 gcm_aniso); + void sampler_setup(rsx::texture& tex, VkImageViewType type, VkComponentMapping swizzle); + + public: + texture(vk::swap_chain_image &img); + texture() {} + ~texture() {} + + void create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only); + void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height); + void destroy(); + + void init(rsx::texture &tex, vk::command_buffer &cmd, bool ignore_checks); + void init(rsx::texture &tex, vk::command_buffer &cmd); + void flush(vk::command_buffer & cmd); + + //Fill with debug color 0xFF + void init_debug(); + + void change_layout(vk::command_buffer &cmd, VkImageLayout new_layout); + VkImageLayout get_layout(); + + const u32 width(); + const u32 height(); + const u16 mipmaps(); + const VkFormat get_format(); + + operator VkSampler(); + operator VkImageView(); + operator VkImage(); + }; + + class buffer + { + VkBufferView m_view = nullptr; + VkBuffer m_buffer = nullptr; + VkMemoryRequirements m_memory_layout; + VkFormat m_internal_format = VK_FORMAT_UNDEFINED; + VkBufferUsageFlagBits m_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + VkBufferCreateFlags m_flags = 0; + + vk::render_device *owner; + vk::memory_block vram; + u32 m_size = 0; + + bool viewable = false; + + public: + buffer() {} + ~buffer() {} + + void create(vk::render_device &dev, u32 size, VkFormat format, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags) + { + if (m_buffer) throw EXCEPTION("Buffer create called on an existing buffer!"); + + owner = &dev; + + VkBufferCreateInfo infos; + infos.pNext = nullptr; + infos.size = size; + infos.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + infos.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + infos.flags = flags; + infos.usage = usage; + infos.pQueueFamilyIndices = nullptr; + infos.queueFamilyIndexCount = 0; + + CHECK_RESULT(vkCreateBuffer(dev, &infos, nullptr, &m_buffer)); + + //Allocate vram for this buffer + vkGetBufferMemoryRequirements(dev, m_buffer, &m_memory_layout); + vram.allocate_from_pool(dev, m_memory_layout.size, m_memory_layout.memoryTypeBits); + + viewable = !!(usage & (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)); + + //Bind buffer memory + vkBindBufferMemory(dev, m_buffer, vram, 0); + + m_size = m_memory_layout.size; + m_usage = usage; + m_flags = flags; + + set_format(format); + } + + void create(vk::render_device &dev, u32 size, VkFormat format, VkBufferUsageFlagBits usage) + { + create(dev, size, format, usage, 0); + } + + void create(vk::render_device &dev, u32 size, VkFormat format) + { + create(dev, size, format, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); + } + + void create(vk::render_device &dev, u32 size) + { + create(dev, size, VK_FORMAT_UNDEFINED); + } + + void *map(u32 offset, u64 size) + { + void *data = nullptr; + if (size == VK_WHOLE_SIZE) + size = m_memory_layout.size; + + CHECK_RESULT(vkMapMemory((*owner), vram, offset, size, 0, &data)); + return data; + } + + void unmap() + { + vkUnmapMemory((*owner), vram); + } + + void sub_data(u32 offset, u32 size, void *data) + { + //TODO: Synchronization + if (!data && (m_size < size)) + { + vk::render_device *pdev = owner; + + destroy(); + create((*pdev), size, m_internal_format, m_usage, m_flags); + } + + if (!data) return; + if ((offset + size) > m_size) + { + vk::render_device *tmp_owner = owner; + destroy(); + create((*tmp_owner), size, m_internal_format, m_usage, m_flags); + } + + u8 *dst = (u8*)map(offset, size); + u8 *src = (u8*)data; + + memcpy(dst, src, size); + unmap(); + } + + void destroy() + { + if (!owner) return; + + vkDestroyBufferView((*owner), m_view, nullptr); + vkDestroyBuffer((*owner), m_buffer, nullptr); + vram.destroy(); + + owner = nullptr; + m_view = nullptr; + m_buffer = nullptr; + m_internal_format = VK_FORMAT_UNDEFINED; + } + + void set_format(VkFormat format) + { + if (m_internal_format == format || format == VK_FORMAT_UNDEFINED || !viewable) + return; + + if (m_view) + { + vkDestroyBufferView((*owner), m_view, nullptr); + m_view = nullptr; + } + + VkFormatProperties format_properties; + vk::physical_device dev = owner->gpu(); + vkGetPhysicalDeviceFormatProperties(dev, format, &format_properties); + + if (!(format_properties.bufferFeatures & VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) + throw EXCEPTION("Can't map view to requested format"); + + VkBufferViewCreateInfo view_info; + view_info.buffer = m_buffer; + view_info.flags = 0; + view_info.format = format; + view_info.offset = 0; + view_info.pNext = nullptr; + view_info.range = m_size; + view_info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + + CHECK_RESULT(vkCreateBufferView((*owner), &view_info, nullptr, &m_view)); + + m_internal_format = format; + } + + u32 size() + { + return m_size; + } + + vk::render_device& get_owner() + { + return (*owner); + } + + operator VkBuffer() + { + return m_buffer; + } + + operator VkBufferView() + { + if (!viewable) + throw EXCEPTION("Invalid usage! Buffer cannot be viewed as texels."); + + return m_view; + } + }; + + class framebuffer + { + VkFramebuffer m_vk_framebuffer = nullptr; + vk::render_device *owner = nullptr; + + public: + framebuffer() {} + ~framebuffer() {} + + void create(vk::render_device &dev, VkRenderPass pass, VkImageView *attachments, u32 nb_attachments, u32 width, u32 height) + { + VkFramebufferCreateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + infos.flags = 0; + infos.width = width; + infos.height = height; + infos.pNext = nullptr; + infos.attachmentCount = nb_attachments; + infos.pAttachments = attachments; + infos.renderPass = pass; + infos.layers = 1; + + vkCreateFramebuffer(dev, &infos, nullptr, &m_vk_framebuffer); + owner = &dev; + } + + void destroy() + { + if (!owner) return; + + vkDestroyFramebuffer((*owner), m_vk_framebuffer, nullptr); + owner = nullptr; + } + + operator VkFramebuffer() const + { + return m_vk_framebuffer; + } + }; + + class swap_chain_image + { + VkImageView view = nullptr; + VkImage image = nullptr; + VkFormat internal_format; + vk::render_device *owner = nullptr; + + public: + swap_chain_image() {} + + void create(vk::render_device &dev, VkImage &swap_image, VkFormat format) + { + VkImageViewCreateInfo color_image_view; + + color_image_view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + color_image_view.pNext = nullptr; + color_image_view.format = format; + + color_image_view.components.r = VK_COMPONENT_SWIZZLE_R; + color_image_view.components.g = VK_COMPONENT_SWIZZLE_G; + color_image_view.components.b = VK_COMPONENT_SWIZZLE_B; + color_image_view.components.a = VK_COMPONENT_SWIZZLE_A; + + color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + color_image_view.subresourceRange.baseMipLevel = 0; + color_image_view.subresourceRange.levelCount = 1; + color_image_view.subresourceRange.baseArrayLayer = 0; + color_image_view.subresourceRange.layerCount = 1; + + color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D; + color_image_view.flags = 0; + + color_image_view.image = swap_image; + vkCreateImageView(dev, &color_image_view, nullptr, &view); + + image = swap_image; + internal_format = format; + owner = &dev; + } + + void discard(vk::render_device &dev) + { + vkDestroyImageView(dev, view, nullptr); + } + + operator VkImage() + { + return image; + } + + operator VkImageView() + { + return view; + } + + operator vk::texture() + { + return vk::texture(*this); + } + }; + + class swap_chain + { + vk::render_device dev; + + uint32_t m_present_queue = 0xFFFF; + uint32_t m_graphics_queue = 0xFFFF; + + VkQueue vk_graphics_queue = nullptr; + VkQueue vk_present_queue = nullptr; + + /* WSI surface information */ + VkSurfaceKHR m_surface = nullptr; + VkFormat m_surface_format; + VkColorSpaceKHR m_color_space; + + VkSwapchainKHR m_vk_swapchain = nullptr; + std::vector m_swap_images; + + public: + + PFN_vkCreateSwapchainKHR createSwapchainKHR; + PFN_vkDestroySwapchainKHR destroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR getSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR acquireNextImageKHR; + PFN_vkQueuePresentKHR queuePresentKHR; + + swap_chain(vk::physical_device &gpu, uint32_t _present_queue, uint32_t _graphics_queue, VkFormat format, VkSurfaceKHR surface, VkColorSpaceKHR color_space) + { + dev = render_device(gpu, _graphics_queue); + + createSwapchainKHR = (PFN_vkCreateSwapchainKHR)vkGetDeviceProcAddr(dev, "vkCreateSwapchainKHR"); + destroySwapchainKHR = (PFN_vkDestroySwapchainKHR)vkGetDeviceProcAddr(dev, "vkDestroySwapchainKHR"); + getSwapchainImagesKHR = (PFN_vkGetSwapchainImagesKHR)vkGetDeviceProcAddr(dev, "vkGetSwapchainImagesKHR"); + acquireNextImageKHR = (PFN_vkAcquireNextImageKHR)vkGetDeviceProcAddr(dev, "vkAcquireNextImageKHR"); + queuePresentKHR = (PFN_vkQueuePresentKHR)vkGetDeviceProcAddr(dev, "vkQueuePresentKHR"); + + vkGetDeviceQueue(dev, _graphics_queue, 0, &vk_graphics_queue); + vkGetDeviceQueue(dev, _present_queue, 0, &vk_present_queue); + + m_present_queue = _present_queue; + m_graphics_queue = _graphics_queue; + m_surface = surface; + m_color_space = color_space; + m_surface_format = format; + } + + ~swap_chain() + { + } + + void destroy() + { + if (VkDevice pdev = (VkDevice)dev) + { + if (m_vk_swapchain) + { + if (m_swap_images.size()) + { + for (vk::swap_chain_image &img : m_swap_images) + img.discard(dev); + } + + destroySwapchainKHR(pdev, m_vk_swapchain, nullptr); + } + + dev.destroy(); + } + } + + void init_swapchain(u32 width, u32 height) + { + VkSwapchainKHR old_swapchain = m_vk_swapchain; + + uint32_t num_modes; + vk::physical_device& gpu = const_cast(dev.gpu()); + CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &num_modes, NULL)); + + std::vector present_mode_descriptors(num_modes); + CHECK_RESULT(vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, m_surface, &num_modes, present_mode_descriptors.data())); + + VkSurfaceCapabilitiesKHR surface_descriptors; + CHECK_RESULT(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(gpu, m_surface, &surface_descriptors)); + + VkExtent2D swapchainExtent; + + if (surface_descriptors.currentExtent.width == (uint32_t)-1) + { + swapchainExtent.width = width; + swapchainExtent.height = height; + } + else + { + swapchainExtent = surface_descriptors.currentExtent; + width = surface_descriptors.currentExtent.width; + height = surface_descriptors.currentExtent.height; + } + + VkPresentModeKHR swapchain_present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + uint32_t nb_swap_images = surface_descriptors.minImageCount + 1; + + if ((surface_descriptors.maxImageCount > 0) && (nb_swap_images > surface_descriptors.maxImageCount)) + { + // Application must settle for fewer images than desired: + nb_swap_images = surface_descriptors.maxImageCount; + } + + VkSurfaceTransformFlagBitsKHR pre_transform = surface_descriptors.currentTransform; + if (surface_descriptors.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) + pre_transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + + VkSwapchainCreateInfoKHR swap_info; + swap_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + swap_info.pNext = nullptr; + swap_info.surface = m_surface; + swap_info.minImageCount = nb_swap_images; + swap_info.imageFormat = m_surface_format; + swap_info.imageColorSpace = m_color_space; + + swap_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT; + swap_info.preTransform = pre_transform; + swap_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + swap_info.imageArrayLayers = 1; + swap_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swap_info.queueFamilyIndexCount = 0; + swap_info.pQueueFamilyIndices = nullptr; + swap_info.presentMode = swapchain_present_mode; + swap_info.oldSwapchain = old_swapchain; + swap_info.clipped = true; + + swap_info.imageExtent.width = width; + swap_info.imageExtent.height = height; + + createSwapchainKHR(dev, &swap_info, nullptr, &m_vk_swapchain); + + if (old_swapchain) + destroySwapchainKHR(dev, old_swapchain, nullptr); + + nb_swap_images = 0; + getSwapchainImagesKHR(dev, m_vk_swapchain, &nb_swap_images, nullptr); + + if (!nb_swap_images) throw EXCEPTION("Undefined trap"); + + std::vector swap_images; + swap_images.resize(nb_swap_images); + getSwapchainImagesKHR(dev, m_vk_swapchain, &nb_swap_images, swap_images.data()); + + m_swap_images.resize(nb_swap_images); + for (u32 i = 0; i < nb_swap_images; ++i) + { + m_swap_images[i].create(dev, swap_images[i], m_surface_format); + } + } + + u32 get_swap_image_count() + { + return m_swap_images.size(); + } + + vk::swap_chain_image& get_swap_chain_image(const int index) + { + return m_swap_images[index]; + } + + const vk::render_device& get_device() + { + return dev; + } + + const VkQueue& get_present_queue() + { + return vk_graphics_queue; + } + + const VkFormat get_surface_format() + { + return m_surface_format; + } + + operator const VkSwapchainKHR() + { + return m_vk_swapchain; + } + }; + + class command_pool + { + vk::render_device *owner = nullptr; + VkCommandPool pool = nullptr; + + public: + command_pool() {} + ~command_pool() {} + + void create(vk::render_device &dev) + { + owner = &dev; + VkCommandPoolCreateInfo infos; + infos.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + infos.pNext = nullptr; + infos.queueFamilyIndex = 0; + infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + + CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool)); + } + + void destroy() + { + if (!pool) + return; + + vkDestroyCommandPool((*owner), pool, nullptr); + pool = nullptr; + } + + vk::render_device& get_owner() + { + return (*owner); + } + + operator VkCommandPool() + { + return pool; + } + }; + + class command_buffer + { + vk::command_pool *pool = nullptr; + VkCommandBuffer commands = nullptr; + + public: + command_buffer() {} + ~command_buffer() {} + + void create(vk::command_pool &cmd_pool) + { + VkCommandBufferAllocateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + infos.commandBufferCount = 1; + infos.commandPool = (VkCommandPool)cmd_pool; + infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + infos.pNext = nullptr; + + CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands)); + pool = &cmd_pool; + } + + void destroy() + { + vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands); + } + + operator VkCommandBuffer() + { + return commands; + } + }; + + class context + { + private: + std::vector gpus; + + std::vector m_vk_instances; + VkInstance m_instance; + + PFN_vkDestroyDebugReportCallbackEXT destroyDebugReportCallback = nullptr; + PFN_vkCreateDebugReportCallbackEXT createDebugReportCallback = nullptr; + VkDebugReportCallbackEXT m_debugger = nullptr; + + public: + + context() + { + m_instance = nullptr; + } + + ~context() + { + if (m_instance || m_vk_instances.size()) + close(); + } + + void close() + { + if (!m_vk_instances.size()) return; + + if (m_debugger) + { + destroyDebugReportCallback(m_instance, m_debugger, nullptr); + m_debugger = nullptr; + } + + for (VkInstance &inst : m_vk_instances) + { + vkDestroyInstance(inst, nullptr); + } + + m_instance = nullptr; + m_vk_instances.resize(0); + } + + void enable_debugging() + { + PFN_vkDebugReportCallbackEXT callback = vk::dbgFunc; + + createDebugReportCallback = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(m_instance, "vkCreateDebugReportCallbackEXT"); + destroyDebugReportCallback = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(m_instance, "vkDestroyDebugReportCallbackEXT"); + + VkDebugReportCallbackCreateInfoEXT dbgCreateInfo; + dbgCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT; + dbgCreateInfo.pNext = NULL; + dbgCreateInfo.pfnCallback = callback; + dbgCreateInfo.pUserData = NULL; + dbgCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; + + CHECK_RESULT(createDebugReportCallback(m_instance, &dbgCreateInfo, NULL, &m_debugger)); + } + + uint32_t createInstance(const char *app_name) + { + //Initialize a vulkan instance + VkApplicationInfo app; + + app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app.pNext = nullptr; + app.pApplicationName = app_name; + app.applicationVersion = 0; + app.pEngineName = app_name; + app.engineVersion = 0; + app.apiVersion = (1, 0, 0); + + //Set up instance information + const char *requested_extensions[] = + { + "VK_KHR_surface", + "VK_KHR_win32_surface", + "VK_EXT_debug_report", + }; + + const char *validation_layers[] = + { + "VK_LAYER_LUNARG_threading", "VK_LAYER_LUNARG_mem_tracker", + "VK_LAYER_LUNARG_object_tracker", "VK_LAYER_LUNARG_draw_state", + "VK_LAYER_LUNARG_param_checker", "VK_LAYER_LUNARG_swapchain", + "VK_LAYER_LUNARG_device_limits", "VK_LAYER_LUNARG_image", + "VK_LAYER_GOOGLE_unique_objects", + }; + + VkInstanceCreateInfo instance_info; + instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_info.pNext = nullptr; + instance_info.pApplicationInfo = &app; + instance_info.enabledLayerCount = VK_ENABLED_LAYER_COUNT; + instance_info.ppEnabledLayerNames = validation_layers; + instance_info.enabledExtensionCount = 3; + instance_info.ppEnabledExtensionNames = requested_extensions; + + VkInstance instance; + VkResult error = vkCreateInstance(&instance_info, nullptr, &instance); + + if (error != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + + m_vk_instances.push_back(instance); + return m_vk_instances.size(); + } + + void makeCurrentInstance(uint32_t instance_id) + { + if (!instance_id || instance_id > m_vk_instances.size()) + throw EXCEPTION("Undefined trap"); + + if (m_debugger) + { + destroyDebugReportCallback(m_instance, m_debugger, nullptr); + m_debugger = nullptr; + } + + instance_id--; + m_instance = m_vk_instances[instance_id]; + } + + VkInstance getCurrentInstance() + { + return m_instance; + } + + VkInstance getInstanceById(uint32_t instance_id) + { + if (!instance_id || instance_id > m_vk_instances.size()) + throw EXCEPTION("Undefined trap"); + + instance_id--; + return m_vk_instances[instance_id]; + } + + std::vector& enumerateDevices() + { + uint32_t num_gpus; + CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, nullptr)); + + if (gpus.size() != num_gpus) + { + std::vector pdevs(num_gpus); + gpus.resize(num_gpus); + + CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, pdevs.data())); + + for (int i = 0; i < num_gpus; ++i) + gpus[i].set_device(pdevs[i]); + } + + return gpus; + } + + vk::swap_chain* createSwapChain(HINSTANCE hInstance, HWND hWnd, vk::physical_device &dev) + { + VkWin32SurfaceCreateInfoKHR createInfo; + createInfo.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; + createInfo.pNext = NULL; + createInfo.flags = 0; + createInfo.hinstance = hInstance; + createInfo.hwnd = hWnd; + + VkSurfaceKHR surface; + VkResult err = vkCreateWin32SurfaceKHR(m_instance, &createInfo, NULL, &surface); + + uint32_t device_queues = dev.get_queue_count(); + std::vector supportsPresent(device_queues); + + for (int index = 0; index < device_queues; index++) + { + vkGetPhysicalDeviceSurfaceSupportKHR(dev, index, surface, &supportsPresent[index]); + } + + // Search for a graphics and a present queue in the array of queue + // families, try to find one that supports both + uint32_t graphicsQueueNodeIndex = UINT32_MAX; + uint32_t presentQueueNodeIndex = UINT32_MAX; + + for (int i = 0; i < device_queues; i++) + { + if ((dev.get_queue_properties(i).queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) + { + if (graphicsQueueNodeIndex == UINT32_MAX) + graphicsQueueNodeIndex = i; + + if (supportsPresent[i] == VK_TRUE) + { + graphicsQueueNodeIndex = i; + presentQueueNodeIndex = i; + + break; + } + } + } + + if (presentQueueNodeIndex == UINT32_MAX) + { + // If didn't find a queue that supports both graphics and present, then + // find a separate present queue. + for (uint32_t i = 0; i < device_queues; ++i) + { + if (supportsPresent[i] == VK_TRUE) + { + presentQueueNodeIndex = i; + break; + } + } + } + + // Generate error if could not find both a graphics and a present queue + if (graphicsQueueNodeIndex == UINT32_MAX || presentQueueNodeIndex == UINT32_MAX) + throw EXCEPTION("Undefined trap"); + + if (graphicsQueueNodeIndex != presentQueueNodeIndex) + throw EXCEPTION("Undefined trap"); + + // Get the list of VkFormat's that are supported: + uint32_t formatCount; + err = vkGetPhysicalDeviceSurfaceFormatsKHR(dev, surface, &formatCount, nullptr); + if (err != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + + std::vector surfFormats(formatCount); + err = vkGetPhysicalDeviceSurfaceFormatsKHR(dev, surface, &formatCount, surfFormats.data()); + if (err != VK_SUCCESS) throw EXCEPTION("Undefined trap"); + + VkFormat format; + VkColorSpaceKHR color_space; + + if (formatCount == 1 && surfFormats[0].format == VK_FORMAT_UNDEFINED) + { + format = VK_FORMAT_B8G8R8A8_UNORM; + } + else + { + if (!formatCount) throw EXCEPTION("Undefined trap"); + format = surfFormats[0].format; + } + + color_space = surfFormats[0].colorSpace; + + return new swap_chain(dev, presentQueueNodeIndex, graphicsQueueNodeIndex, format, surface, color_space); + } + }; + + class descriptor_pool + { + VkDescriptorPool pool = nullptr; + vk::render_device *owner = nullptr; + + public: + descriptor_pool() {} + ~descriptor_pool() {} + + void create(vk::render_device &dev, VkDescriptorPoolSize *sizes, u32 size_descriptors_count) + { + VkDescriptorPoolCreateInfo infos; + infos.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + infos.maxSets = 2; + infos.pNext = nullptr; + infos.poolSizeCount = size_descriptors_count; + infos.pPoolSizes = sizes; + infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + + owner = &dev; + CHECK_RESULT(vkCreateDescriptorPool(dev, &infos, nullptr, &pool)); + } + + void destroy() + { + if (!pool) return; + + vkDestroyDescriptorPool((*owner), pool, nullptr); + owner = nullptr; + pool = nullptr; + } + + bool valid() + { + return (pool != nullptr); + } + + operator VkDescriptorPool() + { + return pool; + } + }; + + namespace glsl + { + enum program_domain + { + glsl_vertex_program = 0, + glsl_fragment_program = 1 + }; + + enum program_input_type + { + input_type_uniform_buffer = 0, + input_type_texel_buffer = 1, + input_type_texture = 2 + }; + + struct bound_sampler + { + VkImageView image_view = nullptr; + VkSampler sampler = nullptr; + }; + + struct bound_buffer + { + VkBufferView buffer_view = nullptr; + VkBuffer buffer = nullptr; + u32 offset = 0; + u32 size = 0; + }; + + struct program_input + { + program_domain domain; + program_input_type type; + + bound_buffer as_buffer; + bound_sampler as_sampler; + + int location; + std::string name; + }; + + class program + { + struct pipeline_state + { + VkGraphicsPipelineCreateInfo pipeline; + VkPipelineCacheCreateInfo pipeline_cache_desc; + VkPipelineCache pipeline_cache; + VkPipelineVertexInputStateCreateInfo vi; + VkPipelineInputAssemblyStateCreateInfo ia; + VkPipelineRasterizationStateCreateInfo rs; + VkPipelineColorBlendStateCreateInfo cb; + VkPipelineDepthStencilStateCreateInfo ds; + VkPipelineViewportStateCreateInfo vp; + VkPipelineMultisampleStateCreateInfo ms; + VkDynamicState dynamic_state_descriptors[VK_DYNAMIC_STATE_RANGE_SIZE]; + VkPipelineDynamicStateCreateInfo dynamic_state; + + VkPipelineColorBlendAttachmentState att_state[4]; + + VkPipelineShaderStageCreateInfo shader_stages[2]; + VkRenderPass render_pass = nullptr; + VkShaderModule vs, fs; + VkPipeline pipeline_handle = nullptr; + + VkDescriptorSetLayout descriptor_layouts[2];; + VkDescriptorSet descriptor_sets[2]; + VkPipelineLayout pipeline_layout; + + int num_targets = 1; + + bool dirty; + bool in_use; + } + pstate; + + bool uniforms_changed = true; + + vk::render_device *device = nullptr; + std::vector uniforms; + vk::descriptor_pool descriptor_pool; + + void init_pipeline(); + + public: + program(); + program(const program&) = delete; + program(program&& other); + program(vk::render_device &renderer); + + ~program(); + + program& attach_device(vk::render_device &dev); + program& attachFragmentProgram(VkShaderModule prog); + program& attachVertexProgram(VkShaderModule prog); + + void make(); + void destroy(); + + //Render state stuff... + void set_depth_compare_op(VkCompareOp op); + void set_depth_write_mask(VkBool32 write_enable); + void set_depth_test_enable(VkBool32 state); + void set_primitive_topology(VkPrimitiveTopology topology); + void set_color_mask(int num_targets, u8* targets, VkColorComponentFlags *flags); + void set_blend_state(int num_targets, u8* targets, VkBool32 *enable); + void set_blend_state(int num_targets, u8* targets, VkBool32 enable); + void set_blend_func(int num_targets, u8* targets, VkBlendFactor *src_color, VkBlendFactor *dst_color, VkBlendFactor *src_alpha, VkBlendFactor *dst_alpha); + void set_blend_func(int num_targets, u8 * targets, VkBlendFactor src_color, VkBlendFactor dst_color, VkBlendFactor src_alpha, VkBlendFactor dst_alpha); + void set_blend_op(int num_targets, u8* targets, VkBlendOp* color_ops, VkBlendOp* alpha_ops); + void set_blend_op(int num_targets, u8 * targets, VkBlendOp color_op, VkBlendOp alpha_op); + void set_primitive_restart(VkBool32 state); + + void init_descriptor_layout(); + void update_descriptors(); + void destroy_descriptors(); + + void set_draw_buffer_count(u8 draw_buffers); + + program& load_uniforms(program_domain domain, std::vector& inputs); + + void use(vk::command_buffer& commands, VkRenderPass pass, u32 subpass); + + bool has_uniform(program_domain domain, std::string uniform_name); + bool bind_uniform(program_domain domain, std::string uniform_name); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::texture &_texture); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store); + + program& operator = (const program&) = delete; + program& operator = (program&& other); + }; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h new file mode 100644 index 0000000000..ecb8cf90f1 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -0,0 +1,47 @@ +#pragma once +#include "VKVertexProgram.h" +#include "VKFragmentProgram.h" +#include "../Common/ProgramStateCache.h" + +struct VKTraits +{ + using vertex_program_type = VKVertexProgram; + using fragment_program_type = VKFragmentProgram; + using pipeline_storage_type = vk::glsl::program; + using pipeline_properties = void*; + + static + void recompile_fragment_program(const RSXFragmentProgram &RSXFP, fragment_program_type& fragmentProgramData, size_t ID) + { + fragmentProgramData.Decompile(RSXFP); + fragmentProgramData.Compile(); + } + + static + void recompile_vertex_program(const RSXVertexProgram &RSXVP, vertex_program_type& vertexProgramData, size_t ID) + { + vertexProgramData.Decompile(RSXVP); + vertexProgramData.Compile(); + } + + static + pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties) + { + pipeline_storage_type result(*vk::get_current_renderer()); + + std::vector vertex_uniforms = vertexProgramData.uniforms; + std::vector fragment_uniforms = fragmentProgramData.uniforms; + + result.attachVertexProgram(vertexProgramData.handle) + .attachFragmentProgram(fragmentProgramData.handle) + .load_uniforms(vk::glsl::glsl_vertex_program, vertex_uniforms) + .load_uniforms(vk::glsl::glsl_fragment_program, fragment_uniforms) + .make(); + + return result; + } +}; + +class VKProgramBuffer : public program_state_cache +{ +}; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp new file mode 100644 index 0000000000..faa9e7a328 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -0,0 +1,805 @@ +#include "stdafx.h" +#include "VKHelpers.h" + +namespace vk +{ + namespace glsl + { + program::program() + { + memset(&pstate, 0, sizeof(pstate)); + } + + program::program(vk::render_device &renderer) + { + memset(&pstate, 0, sizeof(pstate)); + init_pipeline(); + device = &renderer; + } + + program::program(program&& other) + { + //This object does not yet exist in a valid state. Clear the original + memset(&pstate, 0, sizeof(pstate)); + + pipeline_state tmp; + memcpy(&tmp, &pstate, sizeof pstate); + memcpy(&pstate, &other.pstate, sizeof pstate); + memcpy(&other.pstate, &tmp, sizeof pstate); + + std::vector tmp_uniforms = uniforms; + uniforms = other.uniforms; + other.uniforms = tmp_uniforms; + + vk::descriptor_pool tmp_pool; + descriptor_pool = other.descriptor_pool; + other.descriptor_pool = tmp_pool; + + vk::render_device *tmp_dev = device; + device = other.device; + other.device = tmp_dev; + + bool _uniforms_changed = uniforms_changed; + uniforms_changed = other.uniforms_changed; + other.uniforms_changed = _uniforms_changed; + } + + program& program::operator = (program&& other) + { + pipeline_state tmp; + memcpy(&tmp, &pstate, sizeof pstate); + memcpy(&pstate, &other.pstate, sizeof pstate); + memcpy(&other.pstate, &tmp, sizeof pstate); + + std::vector tmp_uniforms = uniforms; + uniforms = other.uniforms; + other.uniforms = tmp_uniforms; + + vk::descriptor_pool tmp_pool; + descriptor_pool = other.descriptor_pool; + other.descriptor_pool = tmp_pool; + + vk::render_device *tmp_dev = device; + device = other.device; + other.device = tmp_dev; + + bool _uniforms_changed = uniforms_changed; + uniforms_changed = other.uniforms_changed; + other.uniforms_changed = _uniforms_changed; + + return *this; + } + + void program::init_pipeline() + { + pstate.dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + pstate.dynamic_state.pDynamicStates = pstate.dynamic_state_descriptors; + + pstate.pipeline.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pstate.pipeline.layout = nullptr; + + pstate.vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + + pstate.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + pstate.ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + pstate.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + pstate.rs.polygonMode = VK_POLYGON_MODE_FILL; + pstate.rs.cullMode = VK_CULL_MODE_NONE; + pstate.rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + pstate.rs.depthClampEnable = VK_FALSE; + pstate.rs.rasterizerDiscardEnable = VK_FALSE; + pstate.rs.depthBiasEnable = VK_FALSE; + + pstate.cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + pstate.cb.attachmentCount = 1; + pstate.cb.pAttachments = pstate.att_state; + + for (int i = 0; i < 4; ++i) + { + pstate.att_state[i].colorWriteMask = 0xf; + pstate.att_state[i].blendEnable = VK_FALSE; + } + + pstate.vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + pstate.vp.viewportCount = 1; + pstate.dynamic_state_descriptors[pstate.dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + pstate.vp.scissorCount = 1; + pstate.dynamic_state_descriptors[pstate.dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + pstate.dynamic_state_descriptors[pstate.dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_LINE_WIDTH; + + pstate.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + pstate.ds.depthTestEnable = VK_FALSE; + pstate.ds.depthWriteEnable = VK_TRUE; + pstate.ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + pstate.ds.depthBoundsTestEnable = VK_FALSE; + pstate.ds.back.failOp = VK_STENCIL_OP_KEEP; + pstate.ds.back.passOp = VK_STENCIL_OP_KEEP; + pstate.ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + pstate.ds.stencilTestEnable = VK_FALSE; + pstate.ds.front = pstate.ds.back; + + pstate.ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + pstate.ms.pSampleMask = NULL; + pstate.ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + pstate.fs = nullptr; + pstate.vs = nullptr; + pstate.dirty = true; + + pstate.pipeline.stageCount = 2; + + pstate.shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pstate.shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + pstate.shader_stages[0].module = nullptr; + pstate.shader_stages[0].pName = "main"; + + pstate.shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pstate.shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + pstate.shader_stages[1].module = nullptr; + pstate.shader_stages[1].pName = "main"; + + pstate.pipeline_cache_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + } + + program::~program() + { + LOG_ERROR(RSX, "Program destructor invoked!"); + destroy(); + } + + program& program::attach_device(vk::render_device &dev) + { + if (!device) + init_pipeline(); + + device = &dev; + return *this; + } + + program& program::attachFragmentProgram(VkShaderModule prog) + { + pstate.fs = prog; + return *this; + } + + program& program::attachVertexProgram(VkShaderModule prog) + { + pstate.vs = prog; + return *this; + } + + void program::make() + { + if (pstate.fs == nullptr || pstate.vs == nullptr) + throw EXCEPTION("Missing shader stage!"); + + pstate.shader_stages[0].module = pstate.vs; + pstate.shader_stages[1].module = pstate.fs; + + CHECK_RESULT(vkCreatePipelineCache((*device), &pstate.pipeline_cache_desc, nullptr, &pstate.pipeline_cache)); + } + + void program::set_depth_compare_op(VkCompareOp op) + { + if (pstate.ds.depthCompareOp != op) + { + pstate.ds.depthCompareOp = op; + pstate.dirty = true; + } + } + + void program::set_depth_write_mask(VkBool32 write_enable) + { + if (pstate.ds.depthWriteEnable != write_enable) + { + pstate.ds.depthWriteEnable = write_enable; + pstate.dirty = true; + } + } + + void program::set_depth_test_enable(VkBool32 state) + { + if (pstate.ds.depthTestEnable != state) + { + pstate.ds.depthTestEnable = state; + pstate.dirty = true; + } + } + + void program::set_primitive_topology(VkPrimitiveTopology topology) + { + if (pstate.ia.topology != topology) + { + pstate.ia.topology = topology; + pstate.dirty = true; + } + } + + void program::set_color_mask(int num_targets, u8* targets, VkColorComponentFlags* flags) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].colorWriteMask != flags[idx]) + { + pstate.att_state[id].colorWriteMask = flags[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_state(int num_targets, u8* targets, VkBool32* enable) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].blendEnable != enable[idx]) + { + pstate.att_state[id].blendEnable = enable[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_state(int num_targets, u8 *targets, VkBool32 enable) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].blendEnable != enable) + { + pstate.att_state[id].blendEnable = enable; + pstate.dirty = true; + } + } + } + + void program::set_blend_func(int num_targets, u8* targets, VkBlendFactor* src_color, VkBlendFactor* dst_color, VkBlendFactor* src_alpha, VkBlendFactor* dst_alpha) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].srcColorBlendFactor != src_color[idx]) + { + pstate.att_state[id].srcColorBlendFactor = src_color[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstColorBlendFactor != dst_color[idx]) + { + pstate.att_state[id].dstColorBlendFactor = dst_color[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].srcAlphaBlendFactor != src_alpha[idx]) + { + pstate.att_state[id].srcAlphaBlendFactor = src_alpha[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstAlphaBlendFactor != dst_alpha[idx]) + { + pstate.att_state[id].dstAlphaBlendFactor = dst_alpha[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_func(int num_targets, u8* targets, VkBlendFactor src_color, VkBlendFactor dst_color, VkBlendFactor src_alpha, VkBlendFactor dst_alpha) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].srcColorBlendFactor != src_color) + { + pstate.att_state[id].srcColorBlendFactor = src_color; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstColorBlendFactor != dst_color) + { + pstate.att_state[id].dstColorBlendFactor = dst_color; + pstate.dirty = true; + } + + if (pstate.att_state[id].srcAlphaBlendFactor != src_alpha) + { + pstate.att_state[id].srcAlphaBlendFactor = src_alpha; + pstate.dirty = true; + } + + if (pstate.att_state[id].dstAlphaBlendFactor != dst_alpha) + { + pstate.att_state[id].dstAlphaBlendFactor = dst_alpha; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_op(int num_targets, u8* targets, VkBlendOp* color_ops, VkBlendOp* alpha_ops) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].colorBlendOp != color_ops[idx]) + { + pstate.att_state[id].colorBlendOp = color_ops[idx]; + pstate.dirty = true; + } + + if (pstate.att_state[id].alphaBlendOp != alpha_ops[idx]) + { + pstate.att_state[id].alphaBlendOp = alpha_ops[idx]; + pstate.dirty = true; + } + } + } + } + + void program::set_blend_op(int num_targets, u8* targets, VkBlendOp color_op, VkBlendOp alpha_op) + { + if (num_targets) + { + for (u8 idx = 0; idx < num_targets; ++idx) + { + u8 &id = targets[idx]; + if (pstate.att_state[id].colorBlendOp != color_op) + { + pstate.att_state[id].colorBlendOp = color_op; + pstate.dirty = true; + } + + if (pstate.att_state[id].alphaBlendOp != alpha_op) + { + pstate.att_state[id].alphaBlendOp = alpha_op; + pstate.dirty = true; + } + } + } + } + + void program::set_primitive_restart(VkBool32 state) + { + if (pstate.ia.primitiveRestartEnable != state) + { + pstate.ia.primitiveRestartEnable = state; + pstate.dirty = true; + } + } + + void program::init_descriptor_layout() + { + if (pstate.descriptor_layouts[0] != nullptr) + throw EXCEPTION("Existing descriptors found!"); + + if (descriptor_pool.valid()) + descriptor_pool.destroy(); + + std::vector layout_bindings[2]; + std::vector sizes; + + program_input_type types[] = { input_type_uniform_buffer, input_type_texel_buffer, input_type_texture }; + program_domain stages[] = { glsl_vertex_program, glsl_fragment_program }; + + VkDescriptorType vk_ids[] = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER }; + VkShaderStageFlags vk_stages[] = { VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_FRAGMENT_BIT }; + + for (auto &input : uniforms) + { + VkDescriptorSetLayoutBinding binding; + binding.binding = input.location; + binding.descriptorCount = 1; + binding.descriptorType = vk_ids[(u32)input.type]; + binding.pImmutableSamplers = nullptr; + binding.stageFlags = vk_stages[(u32)input.domain]; + + layout_bindings[(u32)input.domain].push_back(binding); + } + + for (int i = 0; i < 3; ++i) + { + u32 count = 0; + for (auto &input : uniforms) + { + if (input.type == types[i]) + count++; + } + + if (!count) continue; + + VkDescriptorPoolSize size; + size.descriptorCount = count; + size.type = vk_ids[i]; + + sizes.push_back(size); + } + + descriptor_pool.create((*device), sizes.data(), sizes.size()); + + VkDescriptorSetLayoutCreateInfo infos; + infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + infos.pNext = nullptr; + infos.flags = 0; + infos.pBindings = layout_bindings[0].data(); + infos.bindingCount = layout_bindings[0].size(); + + CHECK_RESULT(vkCreateDescriptorSetLayout((*device), &infos, nullptr, &pstate.descriptor_layouts[0])); + + infos.pBindings = layout_bindings[1].data(); + infos.bindingCount = layout_bindings[1].size(); + + CHECK_RESULT(vkCreateDescriptorSetLayout((*device), &infos, nullptr, &pstate.descriptor_layouts[1])); + + VkPipelineLayoutCreateInfo layout_info; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.pNext = nullptr; + layout_info.setLayoutCount = 2; + layout_info.pSetLayouts = pstate.descriptor_layouts; + layout_info.flags = 0; + layout_info.pPushConstantRanges = nullptr; + layout_info.pushConstantRangeCount = 0; + + CHECK_RESULT(vkCreatePipelineLayout((*device), &layout_info, nullptr, &pstate.pipeline_layout)); + + VkDescriptorSetAllocateInfo alloc_info; + alloc_info.descriptorPool = descriptor_pool; + alloc_info.descriptorSetCount = 2; + alloc_info.pNext = nullptr; + alloc_info.pSetLayouts = pstate.descriptor_layouts; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + + CHECK_RESULT(vkAllocateDescriptorSets((*device), &alloc_info, pstate.descriptor_sets)); + } + + void program::update_descriptors() + { + if (!pstate.descriptor_layouts[0]) + init_descriptor_layout(); + + std::vector descriptor_writers; + std::vector images(16); + std::vector buffers(16); + std::vector texel_buffers(16); + std::vector texel_buffer_views(16); + VkWriteDescriptorSet write; + + int image_index = 0; + int buffer_index = 0; + int texel_buffer_index = 0; + + for (auto &input : uniforms) + { + switch (input.type) + { + case input_type_texture: + { + auto &image = images[image_index++]; + image.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image.sampler = null_sampler(); + image.imageView = null_image_view(); + + if (input.as_sampler.sampler && input.as_sampler.image_view) + { + image.imageView = input.as_sampler.image_view; + image.sampler = input.as_sampler.sampler; + image.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + else + LOG_ERROR(RSX, "Texture object was not bound: %s", input.name); + + memset(&write, 0, sizeof(write)); + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.pImageInfo = ℑ + write.descriptorCount = 1; + + break; + } + case input_type_uniform_buffer: + { + auto &buffer = buffers[buffer_index++]; + buffer.buffer = null_buffer(); + buffer.offset = 0; + buffer.range = 0; + + if (input.as_buffer.buffer) + { + buffer.buffer = input.as_buffer.buffer; + buffer.range = input.as_buffer.size; + } + else + LOG_ERROR(RSX, "UBO was not bound: %s", input.name); + + memset(&write, 0, sizeof(write)); + write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write.pBufferInfo = &buffer; + write.descriptorCount = 1; + break; + } + case input_type_texel_buffer: + { + auto &buffer_view = texel_buffer_views[texel_buffer_index]; + buffer_view = null_buffer_view(); + + auto &buffer = texel_buffers[texel_buffer_index++]; + buffer.buffer = null_buffer(); + buffer.offset = 0; + buffer.range = 0; + + if (input.as_buffer.buffer && input.as_buffer.buffer_view) + { + buffer_view = input.as_buffer.buffer_view; + buffer.buffer = input.as_buffer.buffer; + buffer.range = input.as_buffer.size; + } + else + LOG_ERROR(RSX, "Texel buffer was not bound: %s", input.name); + + memset(&write, 0, sizeof(write)); + write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + write.pTexelBufferView = &buffer_view; + write.pBufferInfo = &buffer; + write.descriptorCount = 1; + break; + } + default: + throw EXCEPTION("Unhandled input type!"); + } + + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.dstSet = pstate.descriptor_sets[input.domain]; + write.pNext = nullptr; + + write.dstBinding = input.location; + descriptor_writers.push_back(write); + } + + if (!descriptor_writers.size()) return; + if (descriptor_writers.size() != uniforms.size()) + throw EXCEPTION("Undefined uniform detected"); + + vkUpdateDescriptorSets((*device), descriptor_writers.size(), descriptor_writers.data(), 0, nullptr); + } + + void program::destroy_descriptors() + { + if (pstate.descriptor_sets[0]) + vkFreeDescriptorSets((*device), descriptor_pool, 2, pstate.descriptor_sets); + + if (pstate.pipeline_layout) + vkDestroyPipelineLayout((*device), pstate.pipeline_layout, nullptr); + + if (pstate.descriptor_layouts[0]) + vkDestroyDescriptorSetLayout((*device), pstate.descriptor_layouts[0], nullptr); + + if (pstate.descriptor_layouts[1]) + vkDestroyDescriptorSetLayout((*device), pstate.descriptor_layouts[1], nullptr); + + descriptor_pool.destroy(); + } + + void program::set_draw_buffer_count(u8 draw_buffers) + { + if (pstate.num_targets != draw_buffers) + { + pstate.num_targets = draw_buffers; + pstate.dirty = true; + } + } + + program& program::load_uniforms(program_domain domain, std::vector& inputs) + { + std::vector store = uniforms; + uniforms.resize(0); + + for (auto &item : store) + { + if (item.domain != domain) + uniforms.push_back(item); + } + + for (auto &item : inputs) + uniforms.push_back(item); + + return *this; + } + + void program::use(vk::command_buffer& commands, VkRenderPass pass, u32 subpass) + { + if (/*uniforms_changed*/true) + { + update_descriptors(); + uniforms_changed = false; + } + + if (pstate.dirty) + { + if (pstate.pipeline_handle) + vkDestroyPipeline((*device), pstate.pipeline_handle, nullptr); + + pstate.dynamic_state.pDynamicStates = pstate.dynamic_state_descriptors; + pstate.cb.pAttachments = pstate.att_state; + pstate.cb.attachmentCount = pstate.num_targets; + + //Reconfigure this.. + pstate.pipeline.pVertexInputState = &pstate.vi; + pstate.pipeline.pInputAssemblyState = &pstate.ia; + pstate.pipeline.pRasterizationState = &pstate.rs; + pstate.pipeline.pColorBlendState = &pstate.cb; + pstate.pipeline.pMultisampleState = &pstate.ms; + pstate.pipeline.pViewportState = &pstate.vp; + pstate.pipeline.pDepthStencilState = &pstate.ds; + pstate.pipeline.pStages = pstate.shader_stages; + pstate.pipeline.pDynamicState = &pstate.dynamic_state; + pstate.pipeline.layout = pstate.pipeline_layout; + pstate.pipeline.basePipelineIndex = -1; + pstate.pipeline.basePipelineHandle = VK_NULL_HANDLE; + + pstate.pipeline.renderPass = pass; + + CHECK_RESULT(vkCreateGraphicsPipelines((*device), nullptr, 1, &pstate.pipeline, NULL, &pstate.pipeline_handle)); + pstate.dirty = false; + } + + vkCmdBindPipeline(commands, VK_PIPELINE_BIND_POINT_GRAPHICS, pstate.pipeline_handle); + vkCmdBindDescriptorSets(commands, VK_PIPELINE_BIND_POINT_GRAPHICS, pstate.pipeline_layout, 0, 2, pstate.descriptor_sets, 0, nullptr); + } + + bool program::has_uniform(program_domain domain, std::string uniform_name) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + return true; + } + + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + uniform.as_buffer.buffer = nullptr; + uniform.as_buffer.buffer_view = nullptr; + uniform.as_sampler.image_view = nullptr; + uniform.as_sampler.sampler = nullptr; + + uniforms_changed = true; + return true; + } + } + + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::texture &_texture) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + VkImageView view = _texture; + VkSampler sampler = _texture; + + if (uniform.as_sampler.image_view != view || + uniform.as_sampler.sampler != sampler) + { + uniform.as_sampler.image_view = view; + uniform.as_sampler.sampler = sampler; + uniforms_changed = true; + } + + uniform.type = input_type_texture; + return true; + } + } + + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + VkBuffer buf = _buffer; + u32 size = _buffer.size(); + + if (uniform.as_buffer.buffer != buf || + uniform.as_buffer.size != size) + { + uniform.as_buffer.size = size; + uniform.as_buffer.buffer = buf; + uniform.as_buffer.buffer_view = nullptr; //UBOs cannot be viewed! + + uniforms_changed = true; + } + + uniform.type = input_type_uniform_buffer; + return true; + } + } + + throw EXCEPTION("Failed to bind program uniform %s", uniform_name); + return false; + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store) + { + if (!is_texel_store) + { + return bind_uniform(domain, uniform_name, _buffer); + } + + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + VkBuffer buf = _buffer; + VkBufferView view = _buffer; + u32 size = _buffer.size(); + + if (uniform.as_buffer.buffer != buf || + uniform.as_buffer.buffer_view != view || + uniform.as_buffer.size != size) + { + uniform.as_buffer.size = size; + uniform.as_buffer.buffer = buf; + uniform.as_buffer.buffer_view = view; + + if (!view) + throw EXCEPTION("Invalid buffer passed as texel storage"); + + uniforms_changed = true; + } + + uniform.type = input_type_texel_buffer; + return true; + } + } + + return false; + } + + void program::destroy() + { + if (device) + { + destroy_descriptors(); + uniforms.resize(0); + + if (pstate.pipeline_handle) + vkDestroyPipeline((*device), pstate.pipeline_handle, nullptr); + + if (pstate.pipeline_cache) + vkDestroyPipelineCache((*device), pstate.pipeline_cache, nullptr); + } + + memset(&pstate, 0, sizeof pstate); + device = nullptr; + } + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h new file mode 100644 index 0000000000..666a2b6e52 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -0,0 +1,118 @@ +#pragma once + +#include "stdafx.h" +#include "VKHelpers.h" +#include "../GCM.h" +#include "../Common/surface_store.h" + +namespace rsx +{ + struct vk_render_target_traits + { + using surface_storage_type = vk::texture ; + using surface_type = vk::texture*; + using command_list_type = vk::command_buffer*; + using download_buffer_object = void*; + + static vk::texture create_new_surface(u32 address, surface_color_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd) + { + VkFormat requested_format = vk::get_compatible_surface_format(format); + + vk::texture rtt; + rtt.create(device, requested_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height); + rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + return rtt; + } + + static vk::texture create_new_surface(u32 address, surface_depth_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd) + { + VkFormat requested_format = vk::get_compatible_depth_surface_format(format); + + vk::texture rtt; + rtt.create(device, requested_format, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, width, height); + rtt.change_layout(*cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + return rtt; + } + + static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + } + + static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + static void prepare_ds_for_drawing(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + + static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::texture *surface) + { + surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + static bool rtt_has_format_width_height(const vk::texture &rtt, surface_color_format format, size_t width, size_t height) + { + VkFormat fmt = vk::get_compatible_surface_format(format); + vk::texture &tex = const_cast(rtt); + + if (tex.get_format() == fmt && + tex.width() == width && + tex.height() == height) + return true; + + return false; + } + + static bool ds_has_format_width_height(const vk::texture &ds, surface_depth_format format, size_t width, size_t height) + { + VkFormat fmt = vk::get_compatible_depth_surface_format(format); + vk::texture &tex = const_cast(ds); + + if (tex.get_format() == fmt && + tex.width() == width && + tex.height() == height) + return true; + + return false; + } + + static download_buffer_object issue_download_command(surface_type, surface_color_format color_format, size_t width, size_t height, ...) + { + return nullptr; + } + + static download_buffer_object issue_depth_download_command(surface_type, surface_depth_format depth_format, size_t width, size_t height, ...) + { + return nullptr; + } + + static download_buffer_object issue_stencil_download_command(surface_type, surface_depth_format depth_format, size_t width, size_t height, ...) + { + return nullptr; + } + + gsl::span map_downloaded_buffer(download_buffer_object, ...) + { + return{ (gsl::byte*)nullptr, 0 }; + } + + static void unmap_downloaded_buffer(download_buffer_object, ...) + { + } + + static vk::texture *get(const vk::texture &tex) + { + return const_cast(&tex); + } + }; + + struct vk_render_targets : public rsx::surface_store + { + }; +} diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp new file mode 100644 index 0000000000..78e1d46498 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -0,0 +1,560 @@ +#include "stdafx.h" +#include "VKHelpers.h" +#include "../GCM.h" +#include "../RSXThread.h" +#include "../RSXTexture.h" +#include "../rsx_utils.h" +#include "../Common/TextureUtils.h" + +namespace vk +{ + VkComponentMapping default_component_map() + { + VkComponentMapping result; + result.a = VK_COMPONENT_SWIZZLE_A; + result.r = VK_COMPONENT_SWIZZLE_R; + result.g = VK_COMPONENT_SWIZZLE_G; + result.b = VK_COMPONENT_SWIZZLE_B; + + return result; + } + + VkImageSubresource default_image_subresource() + { + VkImageSubresource subres; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.mipLevel = 0; + subres.arrayLayer = 0; + + return subres; + } + + VkImageSubresourceRange default_image_subresource_range() + { + VkImageSubresourceRange subres; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.baseArrayLayer = 0; + subres.baseMipLevel = 0; + subres.layerCount = 1; + subres.levelCount = 1; + + return subres; + } + + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) + { + VkImageSubresourceLayers a_src, a_dst; + a_src.aspectMask = aspect; + a_src.baseArrayLayer = 0; + a_src.layerCount = 1; + a_src.mipLevel = 0; + + a_dst = a_src; + + VkImageCopy rgn; + rgn.extent.depth = 1; + rgn.extent.width = width; + rgn.extent.height = height; + rgn.dstOffset = { 0, 0, 0 }; + rgn.srcOffset = { 0, 0, 0 }; + rgn.srcSubresource = a_src; + rgn.dstSubresource = a_dst; + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, aspect); + + for (int mip_level = 0; mip_level < mipmaps; ++mip_level) + { + vkCmdCopyImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn); + + rgn.srcSubresource.mipLevel++; + rgn.dstSubresource.mipLevel++; + } + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, aspect); + } + + void copy_scaled_image(VkCommandBuffer cmd, VkImage & src, VkImage & dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_width, u32 src_height, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect) + { + VkImageSubresourceLayers a_src, a_dst; + a_src.aspectMask = aspect; + a_src.baseArrayLayer = 0; + a_src.layerCount = 1; + a_src.mipLevel = 0; + + a_dst = a_src; + + VkImageBlit rgn; + rgn.srcOffsets[0] = { 0, 0, 0 }; + rgn.srcOffsets[1] = { (int32_t)src_width, (int32_t)src_height, 1 }; + rgn.dstOffsets[0] = { 0, 0, 0 }; + rgn.dstOffsets[1] = { (int32_t)dst_width, (int32_t)dst_height, 1 }; + rgn.dstSubresource = a_dst; + rgn.srcSubresource = a_src; + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, aspect); + + for (int mip_level = 0; mip_level < mipmaps; ++mip_level) + { + vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR); + + rgn.srcSubresource.mipLevel++; + rgn.dstSubresource.mipLevel++; + } + + if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, aspect); + + if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, aspect); + } + + void copy_texture(VkCommandBuffer cmd, texture &src, texture &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) + { + VkImage isrc = (VkImage)src; + VkImage idst = (VkImage)dst; + + copy_image(cmd, isrc, idst, srcLayout, dstLayout, width, height, mipmaps, aspect); + } + + texture::texture(vk::swap_chain_image &img) + { + m_image_contents = img; + m_view = img; + m_sampler = nullptr; + + //We did not create this object, do not allow internal modification! + owner = nullptr; + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + { + owner = &device; + + //First create the image + VkImageCreateInfo image_info; + memset(&image_info, 0, sizeof(image_info)); + + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = nullptr; + image_info.imageType = image_type; + image_info.format = format; + image_info.extent = { width, height, 1 }; + image_info.mipLevels = mipmaps; + image_info.arrayLayers = (image_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)? 6: 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = tiling; + image_info.usage = usage; + image_info.flags = image_flags; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + CHECK_RESULT(vkCreateImage(device, &image_info, nullptr, &m_image_contents)); + + vkGetImageMemoryRequirements(device, m_image_contents, &m_memory_layout); + vram_allocation.allocate_from_pool(device, m_memory_layout.size, m_memory_layout.memoryTypeBits); + + CHECK_RESULT(vkBindImageMemory(device, m_image_contents, vram_allocation, 0)); + + VkImageViewCreateInfo view_info; + view_info.format = format; + view_info.image = m_image_contents; + view_info.pNext = nullptr; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.viewType = view_type; + view_info.components = swizzle; + view_info.subresourceRange = default_image_subresource_range(); + view_info.flags = 0; + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + { + view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT/* | VK_IMAGE_ASPECT_STENCIL_BIT*/; + m_image_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + } + + CHECK_RESULT(vkCreateImageView(device, &view_info, nullptr, &m_view)); + + m_width = width; + m_height = height; + m_mipmaps = mipmaps; + m_internal_format = format; + m_flags = usage; + m_view_type = view_type; + m_usage = usage; + m_tiling = tiling; + + if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT || + usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + { + VkSamplerAddressMode clamp_s = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VkSamplerAddressMode clamp_t = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VkSamplerAddressMode clamp_r = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + + VkSamplerCreateInfo sampler_info; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.addressModeU = clamp_s; + sampler_info.addressModeV = clamp_t; + sampler_info.addressModeW = clamp_r; + sampler_info.anisotropyEnable = VK_FALSE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.pNext = nullptr; + sampler_info.unnormalizedCoordinates = VK_FALSE; + sampler_info.mipLodBias = 0; + sampler_info.maxAnisotropy = 0; + sampler_info.flags = 0; + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + CHECK_RESULT(vkCreateSampler((*owner), &sampler_info, nullptr, &m_sampler)); + } + + ready = true; + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + { + create(device, format, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, 0, usage, tiling, width, height, mipmaps, gpu_only, swizzle); + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping& swizzle) + { + VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL; + + if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) + { + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(device.gpu(), format, &props); + + //Enable linear tiling if supported and we request a sampled image.. + if (props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + tiling = VK_IMAGE_TILING_LINEAR; + else + usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + create(device, format, usage, tiling, width, height, mipmaps, gpu_only, swizzle); + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps, bool gpu_only) + { + create(device, format, usage, width, height, mipmaps, gpu_only, vk::default_component_map()); + } + + void texture::create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height) + { + create(device, format, usage, width, height, 1, false); + } + + VkSamplerAddressMode texture::vk_wrap_mode(u32 gcm_wrap) + { + switch (gcm_wrap) + { + case CELL_GCM_TEXTURE_WRAP: return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case CELL_GCM_TEXTURE_MIRROR: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case CELL_GCM_TEXTURE_CLAMP_TO_EDGE: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_BORDER: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case CELL_GCM_TEXTURE_CLAMP: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + default: + throw EXCEPTION("unhandled texture clamp mode 0x%X", gcm_wrap); + } + } + + float texture::max_aniso(u32 gcm_aniso) + { + switch (gcm_aniso) + { + case CELL_GCM_TEXTURE_MAX_ANISO_1: return 1.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_2: return 2.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_4: return 4.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_6: return 6.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_8: return 8.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_10: return 10.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_12: return 12.0f; + case CELL_GCM_TEXTURE_MAX_ANISO_16: return 16.0f; + } + + LOG_ERROR(RSX, "Texture anisotropy error: bad max aniso (%d).", gcm_aniso); + return 1.0f; + } + + void texture::sampler_setup(rsx::texture &tex, VkImageViewType type, VkComponentMapping swizzle) + { + VkSamplerAddressMode clamp_s = vk_wrap_mode(tex.wrap_s()); + VkSamplerAddressMode clamp_t = vk_wrap_mode(tex.wrap_t()); + VkSamplerAddressMode clamp_r = vk_wrap_mode(tex.wrap_r()); + + VkSamplerCreateInfo sampler_info; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.addressModeU = clamp_s; + sampler_info.addressModeV = clamp_t; + sampler_info.addressModeW = clamp_r; + sampler_info.anisotropyEnable = VK_TRUE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.pNext = nullptr; + sampler_info.unnormalizedCoordinates = !!(tex.format() & CELL_GCM_TEXTURE_UN); + sampler_info.mipLodBias = tex.bias(); + sampler_info.maxAnisotropy = max_aniso(tex.max_aniso()); + sampler_info.flags = 0; + sampler_info.maxLod = tex.max_lod(); + sampler_info.minLod = tex.min_lod(); + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + CHECK_RESULT(vkCreateSampler((*owner), &sampler_info, nullptr, &m_sampler)); + } + + void texture::init(rsx::texture& tex, vk::command_buffer &cmd, bool ignore_checks) + { + VkImageViewType best_type = VK_IMAGE_VIEW_TYPE_2D; + + if (tex.cubemap() && m_view_type != VK_IMAGE_VIEW_TYPE_CUBE) + { + vk::render_device &dev = (*owner); + VkFormat format = m_internal_format; + VkImageUsageFlags usage = m_usage; + VkImageTiling tiling = m_tiling; + + destroy(); + create(dev, format, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_CUBE, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT, usage, tiling, tex.width(), tex.height(), tex.mipmap(), false, default_component_map()); + } + + if (!tex.cubemap() && tex.depth() > 1 && m_view_type != VK_IMAGE_VIEW_TYPE_3D) + { + best_type = VK_IMAGE_VIEW_TYPE_3D; + + vk::render_device &dev = (*owner); + VkFormat format = m_internal_format; + VkImageUsageFlags usage = m_usage; + VkImageTiling tiling = m_tiling; + + destroy(); + create(dev, format, VK_IMAGE_TYPE_3D, VK_IMAGE_VIEW_TYPE_3D, 0, usage, tiling, tex.width(), tex.height(), tex.mipmap(), false, default_component_map()); + } + + if (!m_sampler) + sampler_setup(tex, best_type, default_component_map()); + + VkImageSubresource subres; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.mipLevel = 0; + subres.arrayLayer = 0; + + u8 *data; + + VkFormatProperties props; + vk::physical_device dev = owner->gpu(); + vkGetPhysicalDeviceFormatProperties(dev, m_internal_format, &props); + + if (ignore_checks || props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + { + std::vector> layout_alignment(tex.mipmap()); + + for (u32 i = 0; i < tex.mipmap(); ++i) + { + layout_alignment[i].first = 4096; + vkGetImageSubresourceLayout((*owner), m_image_contents, &subres, &layout_alignment[i].second); + + if (m_view_type == VK_IMAGE_VIEW_TYPE_CUBE) + layout_alignment[i].second.size *= 6; + + while (layout_alignment[i].first > 1) + { + //Test if is wholly divisible by alignment.. + if (!(layout_alignment[i].second.rowPitch & (layout_alignment[i].first - 1))) + break; + + layout_alignment[i].first >>= 1; + } + + subres.mipLevel++; + } + + if (tex.mipmap() == 1) + { + u32 buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); + if (buffer_size != layout_alignment[0].second.size) + { + if (buffer_size > layout_alignment[0].second.size) + { + LOG_ERROR(RSX, "Layout->pitch = %d, size=%d, height=%d", layout_alignment[0].second.rowPitch, layout_alignment[0].second.size, tex.height()); + LOG_ERROR(RSX, "Computed alignment would have been %d, which yielded a size of %d", layout_alignment[0].first, buffer_size); + LOG_ERROR(RSX, "Retrying..."); + + //layout_alignment[0].first >>= 1; + buffer_size = get_placed_texture_storage_size(tex, layout_alignment[0].first, layout_alignment[0].first); + + if (buffer_size != layout_alignment[0].second.size) + throw EXCEPTION("Bad texture alignment computation!"); + } + else + { + LOG_ERROR(RSX, "Bad texture alignment computation: expected size=%d bytes, computed=%d bytes, alignment=%d, hw pitch=%d", + layout_alignment[0].second.size, buffer_size, layout_alignment[0].first, layout_alignment[0].second.rowPitch); + } + } + + CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); + gsl::span mapped{ (gsl::byte*)(data + layout_alignment[0].second.offset), gsl::narrow(layout_alignment[0].second.size) }; + + upload_placed_texture(mapped, tex, layout_alignment[0].first); + vkUnmapMemory((*owner), vram_allocation); + } + else + { + auto &layer_props = layout_alignment[layout_alignment.size() - 1].second; + u32 max_size = layer_props.offset + layer_props.size; + + if (m_memory_layout.size < max_size) + { + throw EXCEPTION("Failed to upload texture. Invalid memory block size."); + } + + int index= 0; + std::vector> layout_offset_info(tex.mipmap()); + + for (auto &mip_info : layout_offset_info) + { + auto &alignment = layout_alignment[index].first; + auto &layout = layout_alignment[index++].second; + + mip_info = std::make_pair(layout.offset, layout.rowPitch); + } + + CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); + gsl::span mapped{ (gsl::byte*)(data), gsl::narrow(m_memory_layout.size) }; + + upload_texture_mipmaps(mapped, tex, layout_offset_info); + vkUnmapMemory((*owner), vram_allocation); + } + } + else if (!ignore_checks) + { + if (!staging_texture) + { + staging_texture = new texture(); + staging_texture->create((*owner), m_internal_format, VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_TILING_LINEAR, m_width, m_height, tex.mipmap(), false, default_component_map()); + } + + staging_texture->init(tex, cmd, true); + staging_texture->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + ready = false; + } + } + + void texture::init(rsx::texture &tex, vk::command_buffer &cmd) + { + init(tex, cmd, false); + } + + void texture::flush(vk::command_buffer &cmd) + { + if (!ready) + { + vk::copy_texture(cmd, *staging_texture, *this, staging_texture->get_layout(), m_layout, m_width, m_height, m_mipmaps, m_image_aspect); + ready = true; + } + } + + void texture::init_debug() + { + void *data; + CHECK_RESULT(vkMapMemory((*owner), vram_allocation, 0, m_memory_layout.size, 0, (void**)&data)); + + memset(data, 0xFF, m_memory_layout.size); + vkUnmapMemory((*owner), vram_allocation); + } + + void texture::change_layout(vk::command_buffer &cmd, VkImageLayout new_layout) + { + if (m_layout == new_layout) return; + + vk::change_image_layout(cmd, m_image_contents, m_layout, new_layout, m_image_aspect); + m_layout = new_layout; + } + + VkImageLayout texture::get_layout() + { + return m_layout; + } + + const u32 texture::width() + { + return m_width; + } + + const u32 texture::height() + { + return m_height; + } + + const u16 texture::mipmaps() + { + return m_mipmaps; + } + + void texture::destroy() + { + if (!owner) return; + + if (m_sampler) + vkDestroySampler((*owner), m_sampler, nullptr); + + //Destroy all objects managed by this object + vkDestroyImageView((*owner), m_view, nullptr); + vkDestroyImage((*owner), m_image_contents, nullptr); + + vram_allocation.destroy(); + + owner = nullptr; + m_sampler = nullptr; + m_view = nullptr; + m_image_contents = nullptr; + + if (staging_texture) + { + staging_texture->destroy(); + delete staging_texture; + staging_texture = nullptr; + } + } + + const VkFormat texture::get_format() + { + return m_internal_format; + } + + texture::operator VkImage() + { + return m_image_contents; + } + + texture::operator VkImageView() + { + return m_view; + } + + texture::operator VkSampler() + { + return m_sampler; + } +} diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h new file mode 100644 index 0000000000..a474f3d1cb --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -0,0 +1,240 @@ +#pragma once +#include "stdafx.h" +#include "VKRenderTargets.h" +#include "VKGSRender.h" +#include "../Common/TextureUtils.h" + +namespace vk +{ + struct cached_texture_object + { + u32 native_rsx_address; + u32 native_rsx_size; + + u16 width; + u16 height; + u16 depth; + u16 mipmaps; + + vk::texture uploaded_texture; + + u64 protected_rgn_start; + u64 protected_rgn_end; + + bool exists = false; + bool locked = false; + bool dirty = true; + }; + + class texture_cache + { + private: + std::vector m_cache; + + bool lock_memory_region(u32 start, u32 size) + { + static const u32 memory_page_size = 4096; + start = start & ~(memory_page_size - 1); + size = (u32)align(size, memory_page_size); + + return vm::page_protect(start, size, 0, 0, vm::page_writable); + } + + bool unlock_memory_region(u32 start, u32 size) + { + static const u32 memory_page_size = 4096; + start = start & ~(memory_page_size - 1); + size = (u32)align(size, memory_page_size); + + return vm::page_protect(start, size, 0, vm::page_writable, 0); + } + + bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) + { + //Check for memory area overlap. unlock page(s) if needed and add this index to array. + //Axis separation test + const u32 &block_start = base1; + const u32 block_end = limit1; + + if (limit2 < block_start) return false; + if (base2 > block_end) return false; + + u32 min_separation = (limit2 - base2) + (limit1 - base1); + u32 range_limit = (block_end > limit2) ? block_end : limit2; + u32 range_base = (block_start < base2) ? block_start : base2; + + u32 actual_separation = (range_limit - range_base); + + if (actual_separation < min_separation) + return true; + + return false; + } + + cached_texture_object& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) + { + for (cached_texture_object &tex : m_cache) + { + if (!tex.dirty && tex.exists && + tex.native_rsx_address == rsx_address && + tex.native_rsx_size == rsx_size) + { + if (!confirm_dimensions) return tex; + + if (tex.width == width && tex.height == height && tex.mipmaps == mipmaps) + return tex; + else + { + LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters."); + LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.width, tex.height); + } + } + } + + for (cached_texture_object &tex : m_cache) + { + if (tex.dirty) + { + if (tex.exists) + { + tex.uploaded_texture.destroy(); + tex.exists = false; + } + + return tex; + } + } + + cached_texture_object object; + m_cache.push_back(object); + + return m_cache[m_cache.size() - 1]; + } + + void lock_object(cached_texture_object &obj) + { + static const u32 memory_page_size = 4096; + obj.protected_rgn_start = obj.native_rsx_address & ~(memory_page_size - 1); + obj.protected_rgn_end = (u32)align(obj.native_rsx_size, memory_page_size); + obj.protected_rgn_end += obj.protected_rgn_start; + + lock_memory_region(obj.protected_rgn_start, obj.native_rsx_size); + } + + void unlock_object(cached_texture_object &obj) + { + unlock_memory_region(obj.protected_rgn_start, obj.native_rsx_size); + } + + public: + + texture_cache() {} + ~texture_cache() {} + + void destroy() + { + for (cached_texture_object &tex : m_cache) + { + if (tex.exists) + { + tex.uploaded_texture.destroy(); + tex.exists = false; + } + } + + m_cache.resize(0); + } + + vk::texture& upload_texture(command_buffer cmd, rsx::texture &tex, rsx::vk_render_targets &m_rtts) + { + const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); + const u32 range = (u32)get_texture_size(tex); + + //First check if it exists as an rtt... + vk::texture *rtt_texture = nullptr; + if (rtt_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) + { + return *rtt_texture; + } + + if (rtt_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) + { + return *rtt_texture; + } + + cached_texture_object& cto = find_cached_texture(texaddr, range, true, tex.width(), tex.height(), tex.mipmap()); + if (cto.exists && !cto.dirty) + { + return cto.uploaded_texture; + } + + u32 raw_format = tex.format(); + u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + VkComponentMapping mapping; + VkFormat vk_format = get_compatible_sampler_format(format, mapping, tex.remap()); + + cto.uploaded_texture.create(*vk::get_current_renderer(), vk_format, VK_IMAGE_USAGE_SAMPLED_BIT, tex.width(), tex.height(), tex.mipmap(), false, mapping); + cto.uploaded_texture.init(tex, cmd); + cto.uploaded_texture.change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + cto.exists = true; + cto.dirty = false; + cto.native_rsx_address = texaddr; + cto.native_rsx_size = range; + cto.width = cto.uploaded_texture.width(); + cto.height = cto.uploaded_texture.height(); + cto.mipmaps = cto.uploaded_texture.mipmaps(); + + lock_object(cto); + + return cto.uploaded_texture; + } + + bool invalidate_address(u32 rsx_address) + { + for (cached_texture_object &tex : m_cache) + { + if (tex.dirty) continue; + + if (rsx_address >= tex.protected_rgn_start && + rsx_address < tex.protected_rgn_end) + { + unlock_object(tex); + + tex.native_rsx_address = 0; + tex.dirty = true; + + return true; + } + } + + return false; + } + + void flush(vk::command_buffer &cmd) + { + //Finish all pending transactions for any cache managed textures.. + for (cached_texture_object &tex : m_cache) + { + if (tex.dirty || !tex.exists) continue; + tex.uploaded_texture.flush(cmd); + } + } + + void merge_dirty_textures(std::list dirty_textures) + { + for (vk::texture &tex : dirty_textures) + { + cached_texture_object cto; + cto.uploaded_texture = tex; + cto.locked = false; + cto.exists = true; + cto.dirty = true; + cto.native_rsx_address = 0; + + m_cache.push_back(cto); + } + } + }; +} diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp new file mode 100644 index 0000000000..7709150aaf --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -0,0 +1,566 @@ +#include "stdafx.h" +#include "Utilities/rPlatform.h" // only for rImage +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/state.h" +#include "VKGSRender.h" +#include "../rsx_methods.h" +#include "../Common/BufferUtils.h" + +namespace vk +{ + bool requires_component_expansion(rsx::vertex_base_type type, u32 size) + { + if (size == 3) + { + switch (type) + { + case rsx::vertex_base_type::f: + return true; + } + } + + return false; + } + + u32 get_suitable_vk_size(rsx::vertex_base_type type, u32 size) + { + if (size == 3) + { + switch (type) + { + case rsx::vertex_base_type::f: + return 16; + } + } + + return rsx::get_vertex_type_size_on_host(type, size); + } + + VkFormat get_suitable_vk_format(rsx::vertex_base_type type, u8 size) + { + /** + * Set up buffer fetches to only work on 4-component access. This is hardware dependant so we use 4-component access to avoid branching based on IHV implementation + * AMD GCN 1.0 for example does not support RGB32 formats for texel buffers + */ + const VkFormat vec1_types[] = { VK_FORMAT_R16_UNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM }; + const VkFormat vec2_types[] = { VK_FORMAT_R16G16_UNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM }; + const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM }; //VEC3 COMPONENTS NOT SUPPORTED! + const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM }; + + const VkFormat* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types }; + + if (type > rsx::vertex_base_type::ub256) + throw EXCEPTION("VKGS error: unknown vertex base type 0x%X.", (u32)type); + + return vec_selectors[size][(int)type]; + } + + VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification) + { + requires_modification = false; + + switch (mode) + { + case rsx::primitive_type::lines: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + case rsx::primitive_type::line_loop: + requires_modification = true; + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case rsx::primitive_type::line_strip: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case rsx::primitive_type::points: + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + case rsx::primitive_type::triangles: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + case rsx::primitive_type::triangle_strip: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + case rsx::primitive_type::triangle_fan: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + case rsx::primitive_type::quads: + case rsx::primitive_type::quad_strip: + case rsx::primitive_type::polygon: + requires_modification = true; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + default: + throw ("Unsupported primitive topology 0x%X", (u8)mode); + } + } + + /** + * Expand line loop array to line strip array; simply loop back the last vertex to the first.. + */ + u32 expand_line_loop_array_to_strip(u32 vertex_draw_count, std::vector& indices) + { + int i = 0; + indices.resize(vertex_draw_count + 1); + + for (; i < vertex_draw_count; ++i) + indices[i] = i; + + indices[i] = 0; + return indices.size(); + } + + template + u32 expand_indexed_line_loop_to_strip(u32 original_count, const T* original_indices, std::vector& indices) + { + indices.resize(original_count + 1); + + int i = 0; + for (; i < original_count; ++i) + indices[i] = original_indices[i]; + + indices[i] = original_indices[0]; + return indices.size(); + } + + /** + * Template: Expand any N-compoent vector to a larger X-component vector and pad unused slots with 1 + */ + template + void expand_array_components(const T* src_data, std::vector& dst_data, u32 vertex_count) + { + u32 dst_size = (vertex_count * dst_components * sizeof(T)); + dst_data.resize(dst_size); + + T* src = const_cast(src_data); + T* dst = reinterpret_cast(dst_data.data()); + + for (u32 index = 0; index < vertex_count; ++index) + { + for (u8 channel = 0; channel < dst_components; channel++) + { + if (channel < src_components) + { + *dst = *src; + + dst++; + src++; + } + else + { + *dst = (T)(padding); + dst++; + } + } + } + } + + template + void copy_inlined_data_to_buffer(void *src_data, void *dst_data, u32 vertex_count, rsx::vertex_base_type type, u8 src_channels, u8 dst_channels, u16 element_size, u16 stride) + { + u8 *src = static_cast(src_data); + u8 *dst = static_cast(dst_data); + + for (u32 i = 0; i < vertex_count; ++i) + { + T* src_ptr = reinterpret_cast(src); + T* dst_ptr = reinterpret_cast(dst); + + switch (type) + { + case rsx::vertex_base_type::ub: + { + if (src_channels == 4) + { + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; + + break; + } + } + default: + { + for (u8 ch = 0; ch < dst_channels; ++ch) + { + if (ch < src_channels) + { + *dst_ptr = *src_ptr; + src_ptr++; + } + else + *dst_ptr = (T)(padding); + + dst_ptr++; + } + } + } + + src += stride; + dst += element_size; + } + } + + void prepare_buffer_for_writing(void *data, rsx::vertex_base_type type, u8 vertex_size, u32 vertex_count) + { + switch (type) + { + case rsx::vertex_base_type::sf: + { + if (vertex_size == 3) + { + /** + * Pad the 4th component for half-float arrays to 1, since texelfetch does not mask components + */ + u16 *dst = reinterpret_cast(data); + for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4) + dst[idx] = 0x3c00; + } + + break; + } + } + } +} + +std::tuple +VKGSRender::upload_vertex_data() +{ + //initialize vertex attributes + std::vector vertex_arrays_data; + + const std::string reg_table[] = + { + "in_pos_buffer", "in_weight_buffer", "in_normal_buffer", + "in_diff_color_buffer", "in_spec_color_buffer", + "in_fog_buffer", + "in_point_size_buffer", "in_7_buffer", + "in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer", + "in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer" + }; + + u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; + + std::vector vertex_index_array; + vertex_draw_count = 0; + u32 min_index, max_index; + + if (draw_command == rsx::draw_command::indexed) + { + rsx::index_array_type type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + u32 type_size = gsl::narrow(get_index_type_size(type)); + for (const auto& first_count : first_count_commands) + { + vertex_draw_count += first_count.second; + } + + vertex_index_array.resize(vertex_draw_count * type_size); + + switch (type) + { + case rsx::index_array_type::u32: + std::tie(min_index, max_index) = write_index_array_data_to_buffer_untouched(gsl::span((u32*)vertex_index_array.data(), vertex_draw_count), first_count_commands); + break; + case rsx::index_array_type::u16: + std::tie(min_index, max_index) = write_index_array_data_to_buffer_untouched(gsl::span((u16*)vertex_index_array.data(), vertex_draw_count), first_count_commands); + break; + } + } + + if (draw_command == rsx::draw_command::inlined_array) + { + u32 stride = 0; + u32 offsets[rsx::limits::vertex_count] = { 0 }; + + for (u32 i = 0; i < rsx::limits::vertex_count; ++i) + { + const auto &info = vertex_arrays_info[i]; + if (!info.size) continue; + + offsets[i] = stride; + stride += rsx::get_vertex_type_size_on_host(info.type, info.size); + } + + vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; + + for (int index = 0; index < rsx::limits::vertex_count; ++index) + { + auto &vertex_info = vertex_arrays_info[index]; + + if (!m_program->has_uniform(vk::glsl::glsl_vertex_program, reg_table[index])) + continue; + + if (!vertex_info.size) // disabled + { + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index]); + continue; + } + + const u32 host_element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); + const u32 element_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size); + const u32 data_size = element_size * vertex_draw_count; + const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size); + + vertex_arrays_data.resize(data_size); + u8 *src = reinterpret_cast(inline_vertex_array.data()); + u8 *dst = vertex_arrays_data.data(); + + src += offsets[index]; + u8 opt_size = vertex_info.size; + + if (vertex_info.size == 3) + opt_size = 4; + + //TODO: properly handle cmp type + if (vertex_info.type == rsx::vertex_base_type::cmp) + LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet"); + + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + case rsx::vertex_base_type::sf: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + case rsx::vertex_base_type::s1: + case rsx::vertex_base_type::ub: + case rsx::vertex_base_type::ub256: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + case rsx::vertex_base_type::s32k: + case rsx::vertex_base_type::cmp: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type, vertex_info.size, opt_size, host_element_size, stride); + break; + default: + throw EXCEPTION("Unknown base type %d", vertex_info.type); + } + + auto &buffer = m_attrib_buffers[index]; + + buffer.sub_data(0, data_size, vertex_arrays_data.data()); + buffer.set_format(format); + + //Link texture to uniform location + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index], buffer, true); + } + } + + if (draw_command == rsx::draw_command::array) + { + for (const auto &first_count : first_count_commands) + { + vertex_draw_count += first_count.second; + } + } + + if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed) + { + for (int index = 0; index < rsx::limits::vertex_count; ++index) + { + if (!m_program->has_uniform(vk::glsl::glsl_vertex_program, reg_table[index])) + continue; + + bool enabled = !!(input_mask & (1 << index)); + + if (!enabled) + { + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index]); + continue; + } + + if (vertex_arrays_info[index].size > 0) + { + auto &vertex_info = vertex_arrays_info[index]; + // Active vertex array + std::vector vertex_array; + + // Fill vertex_array + u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); + vertex_array.resize(vertex_draw_count * element_size); + + // Get source pointer + u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; + u32 offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index]; + u32 address = base_offset + rsx::get_address(offset & 0x7fffffff, offset >> 31); + const gsl::byte *src_ptr = gsl::narrow_cast(vm::base(address)); + + u32 num_stored_verts = vertex_draw_count; + + if (draw_command == rsx::draw_command::array) + { + size_t offset = 0; + gsl::span dest_span(vertex_array); + vk::prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + + for (const auto &first_count : first_count_commands) + { + write_vertex_array_data_to_buffer(dest_span.subspan(offset), src_ptr, first_count.first, first_count.second, vertex_info.type, vertex_info.size, vertex_info.stride); + offset += first_count.second * element_size; + } + } + if (draw_command == rsx::draw_command::indexed) + { + num_stored_verts = (max_index + 1); + vertex_array.resize((max_index + 1) * element_size); + gsl::span dest_span(vertex_array); + vk::prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + + write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride); + } + + std::vector converted_buffer; + void *data_ptr = vertex_array.data(); + + if (vk::requires_component_expansion(vertex_info.type, vertex_info.size)) + { + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + vk::expand_array_components(reinterpret_cast(vertex_array.data()), converted_buffer, num_stored_verts); + break; + } + + data_ptr = static_cast(converted_buffer.data()); + } + + const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size); + const u32 data_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size) * num_stored_verts; + + auto &buffer = m_attrib_buffers[index]; + + buffer.sub_data(0, data_size, data_ptr); + buffer.set_format(format); + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index], buffer, true); + } + else if (register_vertex_info[index].size > 0) + { + //Untested! + auto &vertex_data = register_vertex_data[index]; + auto &vertex_info = register_vertex_info[index]; + + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + { + size_t data_size = vertex_data.size(); + const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size); + + std::vector converted_buffer; + void *data_ptr = vertex_data.data(); + + if (vk::requires_component_expansion(vertex_info.type, vertex_info.size)) + { + switch (vertex_info.type) + { + case rsx::vertex_base_type::f: + { + const u32 num_stored_verts = data_size / (sizeof(float) * vertex_info.size); + vk::expand_array_components(reinterpret_cast(vertex_data.data()), converted_buffer, num_stored_verts); + break; + } + } + + data_ptr = static_cast(converted_buffer.data()); + data_size = converted_buffer.size(); + } + + auto &buffer = m_attrib_buffers[index]; + + buffer.sub_data(0, data_size, data_ptr); + buffer.set_format(format); + + m_program->bind_uniform(vk::glsl::glsl_vertex_program, reg_table[index], buffer, true); + break; + } + default: + LOG_ERROR(RSX, "bad non array vertex data format (type = %d, size = %d)", vertex_info.type, vertex_info.size); + break; + } + } + } + } + + bool is_indexed_draw = (draw_command == rsx::draw_command::indexed); + bool index_buffer_filled = false; + bool primitives_emulated = false; + u32 index_count = vertex_draw_count; + + VkIndexType index_format = VK_INDEX_TYPE_UINT16; + VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_mode, primitives_emulated); + + if (primitives_emulated) + { + //Line loops are line-strips with loop-back; using line-strips-with-adj doesnt work for vulkan + if (draw_mode == rsx::primitive_type::line_loop) + { + std::vector indices; + + if (!is_indexed_draw) + { + index_count = vk::expand_line_loop_array_to_strip(vertex_draw_count, indices); + m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data()); + } + else + { + rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + if (indexed_type == rsx::index_array_type::u32) + { + index_format = VK_INDEX_TYPE_UINT32; + std::vector indices32; + + index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u32*)vertex_index_array.data(), indices32); + m_index_buffer.sub_data(0, index_count*sizeof(u32), indices32.data()); + } + else + { + index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u16*)vertex_index_array.data(), indices); + m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data()); + } + } + } + else + { + index_count = get_index_count(draw_mode, vertex_draw_count); + std::vector indices(index_count); + + if (is_indexed_draw) + { + rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + size_t index_size = get_index_type_size(indexed_type); + + std::vector> ranges; + ranges.push_back(std::pair(0, vertex_draw_count)); + + gsl::span dst = { (u16*)indices.data(), gsl::narrow(index_count) }; + write_index_array_data_to_buffer(dst, draw_mode, ranges); + } + else + { + write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast(indices.data()), draw_mode, 0, vertex_draw_count); + } + + m_index_buffer.sub_data(0, index_count * sizeof(u16), indices.data()); + } + + is_indexed_draw = true; + index_buffer_filled = true; + } + + if (!index_buffer_filled && is_indexed_draw) + { + rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); + index_format = VK_INDEX_TYPE_UINT16; + VkFormat fmt = VK_FORMAT_R16_UINT; + + u32 elem_size = get_index_type_size(indexed_type); + + if (indexed_type == rsx::index_array_type::u32) + { + index_format = VK_INDEX_TYPE_UINT32; + fmt = VK_FORMAT_R32_UINT; + } + + u32 index_sz = vertex_index_array.size() / elem_size; + if (index_sz != vertex_draw_count) + LOG_ERROR(RSX, "Vertex draw count mismatch!"); + + m_index_buffer.sub_data(0, vertex_index_array.size(), vertex_index_array.data()); + m_index_buffer.set_format(fmt); //Unnecessary unless viewing contents in sampler... + } + + return std::make_tuple(prims, is_indexed_draw, index_count, index_format); +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp new file mode 100644 index 0000000000..f113d29c4e --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -0,0 +1,301 @@ +#include "stdafx.h" +#include "Emu/System.h" + +#include "VKVertexProgram.h" +#include "VKCommonDecompiler.h" +#include "VKHelpers.h" + +std::string VKVertexDecompilerThread::getFloatTypeName(size_t elementCount) +{ + return vk::getFloatTypeNameImpl(elementCount); +} + +std::string VKVertexDecompilerThread::getIntTypeName(size_t elementCount) +{ + return "ivec4"; +} + + +std::string VKVertexDecompilerThread::getFunction(FUNCTION f) +{ + return vk::getFunctionImpl(f); +} + +std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) +{ + return vk::compareFunctionImpl(f, Op0, Op1); +} + +void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) +{ + OS << "#version 450" << std::endl << std::endl; + OS << "#extension GL_ARB_separate_shader_objects : enable" << std::endl; + OS << "layout(std140, set=0, binding = 0) uniform ScaleOffsetBuffer" << std::endl; + OS << "{" << std::endl; + OS << " mat4 scaleOffsetMat;" << std::endl; + OS << " float fog_param0;\n"; + OS << " float fog_param1;\n"; + OS << "};" << std::endl; + + vk::glsl::program_input in; + in.location = 0; + in.domain = vk::glsl::glsl_vertex_program; + in.name = "ScaleOffsetBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector& inputs) +{ + std::vector> input_data; + for (const ParamType &PT : inputs) + { + for (const ParamItem &PI : PT.items) + { + input_data.push_back(std::make_tuple(PI.location, PI.name)); + } + } + + /** + * Its is important that the locations are in the order that vertex attributes are expected. + * If order is not adhered to, channels may be swapped leading to corruption + */ + + std::sort(input_data.begin(), input_data.end()); + + int location = 2; + for (const std::tuple item : input_data) + { + for (const ParamType &PT : inputs) + { + for (const ParamItem &PI : PT.items) + { + if (PI.name == std::get<1>(item)) + { + vk::glsl::program_input in; + in.location = location; + in.domain = vk::glsl::glsl_vertex_program; + in.name = PI.name + "_buffer"; + in.type = vk::glsl::input_type_texel_buffer; + + this->inputs.push_back(in); + + OS << "layout(set=0, binding=" << location++ << ")" << " uniform samplerBuffer" << " " << PI.name << "_buffer;" << std::endl; + } + } + } + } +} + +void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) +{ + OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer" << std::endl; + OS << "{" << std::endl; + OS << " vec4 vc[468];" << std::endl; + OS << "};" << std::endl; + + vk::glsl::program_input in; + in.location = 1; + in.domain = vk::glsl::glsl_vertex_program; + in.name = "VertexConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + + inputs.push_back(in); +} + +struct reg_info +{ + std::string name; + bool need_declare; + std::string src_reg; + std::string src_reg_mask; + bool need_cast; +}; + +static const reg_info reg_table[] = +{ + { "gl_Position", false, "dst_reg0", "", false }, + { "diff_color", true, "dst_reg1", "", false }, + { "spec_color", true, "dst_reg2", "", false }, + { "front_diff_color", true, "dst_reg3", "", false }, + { "front_spec_color", true, "dst_reg4", "", false }, + { "fog_c", true, "dst_reg5", ".xxxx", true }, + { "gl_ClipDistance[0]", false, "dst_reg5", ".y", false }, + { "gl_ClipDistance[1]", false, "dst_reg5", ".z", false }, + { "gl_ClipDistance[2]", false, "dst_reg5", ".w", false }, + { "gl_PointSize", false, "dst_reg6", ".x", false }, + { "gl_ClipDistance[3]", false, "dst_reg6", ".y", false }, + { "gl_ClipDistance[4]", false, "dst_reg6", ".z", false }, + { "gl_ClipDistance[5]", false, "dst_reg6", ".w", false }, + { "tc0", true, "dst_reg7", "", false }, + { "tc1", true, "dst_reg8", "", false }, + { "tc2", true, "dst_reg9", "", false }, + { "tc3", true, "dst_reg10", "", false }, + { "tc4", true, "dst_reg11", "", false }, + { "tc5", true, "dst_reg12", "", false }, + { "tc6", true, "dst_reg13", "", false }, + { "tc7", true, "dst_reg14", "", false }, + { "tc8", true, "dst_reg15", "", false }, + { "tc9", true, "dst_reg6", "", false } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15. +}; + +void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::vector & outputs) +{ + for (auto &i : reg_table) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", i.src_reg) && i.need_declare) + { + const vk::varying_register_t ® = vk::get_varying_register(i.name); + + // if (i.name == "fogc") + // OS << "layout(location=" << reg.reg_location << ") out vec4 fog_c;" << std::endl; + // else + OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";" << std::endl; + } + } +} + +namespace vk +{ + void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector &inputs) + { + for (const auto &real_input : inputs) + { + if (real_input.location != PI.location) + continue; + + if (!real_input.is_array) + { + OS << " vec4 " << PI.name << " = texelFetch(" << PI.name << "_buffer, 0);" << std::endl; + return; + } + + if (real_input.frequency > 1) + { + if (real_input.is_modulo) + { + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexIndex %" << real_input.frequency << ");" << std::endl; + return; + } + + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexIndex /" << real_input.frequency << ");" << std::endl; + return; + } + + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba;" << std::endl; + return; + } + + OS << " vec4 " << PI.name << " = vec4(0., 0., 0., 1.);" << std::endl; + } +} + +void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) +{ + vk::insert_glsl_legacy_function(OS); + + OS << "void main()" << std::endl; + OS << "{" << std::endl; + + // Declare inside main function + for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) + { + for (const ParamItem &PI : PT.items) + { + OS << " " << PT.type << " " << PI.name; + if (!PI.value.empty()) + OS << " = " << PI.value; + OS << ";" << std::endl; + } + } + + for (const ParamType &PT : m_parr.params[PF_PARAM_IN]) + { + for (const ParamItem &PI : PT.items) + vk::add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs); + } +} + +void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) +{ + for (auto &i : reg_table) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", i.src_reg)) + OS << " " << i.name << " = " << i.src_reg << i.src_reg_mask << ";" << std::endl; + } + + OS << " gl_Position = gl_Position * scaleOffsetMat;" << std::endl; + OS << "}" << std::endl; +} + + +void VKVertexDecompilerThread::Task() +{ + m_shader = Decompile(); + vk_prog->SetInputs(inputs); +} + +VKVertexProgram::VKVertexProgram() +{ +} + +VKVertexProgram::~VKVertexProgram() +{ + Delete(); +} + +void VKVertexProgram::Decompile(const RSXVertexProgram& prog) +{ + VKVertexDecompilerThread decompiler(prog, shader, parr, *this); + decompiler.Task(); +} + +void VKVertexProgram::Compile() +{ + fs::file(fs::get_config_dir() + "VertexProgram.vert", fom::rewrite).write(shader); + + std::vector spir_v; + if (!vk::compile_glsl_to_spv(shader, vk::glsl::glsl_vertex_program, spir_v)) + throw EXCEPTION("Failed to compile vertex shader"); + + VkShaderModuleCreateInfo vs_info; + vs_info.codeSize = spir_v.size() * sizeof(u32); + vs_info.pNext = nullptr; + vs_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + vs_info.pCode = (uint32_t*)spir_v.data(); + vs_info.flags = 0; + + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkCreateShaderModule(dev, &vs_info, nullptr, &handle); + + id = (u32)(handle); +} + +void VKVertexProgram::Delete() +{ + shader.clear(); + + if (handle) + { + if (Emu.IsStopped()) + { + LOG_WARNING(RSX, "VKVertexProgram::Delete(): vkDestroyShaderModule(0x%X) avoided", handle); + } + else + { + VkDevice dev = (VkDevice)*vk::get_current_renderer(); + vkDestroyShaderModule(dev, handle, nullptr); + } + + handle = nullptr; + } +} + +void VKVertexProgram::SetInputs(std::vector& inputs) +{ + for (auto &it : inputs) + { + uniforms.push_back(it); + } +} diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h new file mode 100644 index 0000000000..f914460bb2 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -0,0 +1,58 @@ +#pragma once +#include "../Common/VertexProgramDecompiler.h" +#include "Emu/RSX/RSXVertexProgram.h" +#include "Utilities/Thread.h" +#include "VulkanAPI.h" +#include "../VK/VKHelpers.h" + +struct VKVertexDecompilerThread : public VertexProgramDecompiler +{ + std::string &m_shader; + std::vector inputs; + class VKVertexProgram *vk_prog; +protected: + virtual std::string getFloatTypeName(size_t elementCount) override; + std::string getIntTypeName(size_t elementCount) override; + virtual std::string getFunction(FUNCTION) override; + virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override; + + virtual void insertHeader(std::stringstream &OS) override; + virtual void insertInputs(std::stringstream &OS, const std::vector &inputs) override; + virtual void insertConstants(std::stringstream &OS, const std::vector &constants) override; + virtual void insertOutputs(std::stringstream &OS, const std::vector &outputs) override; + virtual void insertMainStart(std::stringstream &OS) override; + virtual void insertMainEnd(std::stringstream &OS) override; + + const RSXVertexProgram &rsx_vertex_program; +public: + VKVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray& parr, class VKVertexProgram &dst) + : VertexProgramDecompiler(prog) + , m_shader(shader) + , rsx_vertex_program(prog) + , vk_prog(&dst) + { + } + + void Task(); + const std::vector& get_inputs() { return inputs; } +}; + +class VKVertexProgram +{ +public: + VKVertexProgram(); + ~VKVertexProgram(); + + ParamArray parr; + VkShaderModule handle = nullptr; + int id; + std::string shader; + std::vector uniforms; + + void Decompile(const RSXVertexProgram& prog); + void Compile(); + void SetInputs(std::vector& inputs); + +private: + void Delete(); +}; diff --git a/rpcs3/Emu/RSX/VK/VulkanAPI.cpp b/rpcs3/Emu/RSX/VK/VulkanAPI.cpp new file mode 100644 index 0000000000..1577c4e3bc --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VulkanAPI.cpp @@ -0,0 +1 @@ +#include "stdafx.h" \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VulkanAPI.h b/rpcs3/Emu/RSX/VK/VulkanAPI.h new file mode 100644 index 0000000000..4358259ed8 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VulkanAPI.h @@ -0,0 +1,16 @@ +#pragma once + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#define VK_USE_PLATFORM_XLIB_KHR +#endif + +#include +#include +#include "Utilities/types.h" + +namespace vk +{ + void init(); +} diff --git a/rpcs3/Gui/SettingsDialog.cpp b/rpcs3/Gui/SettingsDialog.cpp index be5d1535bd..a5a6e61d12 100644 --- a/rpcs3/Gui/SettingsDialog.cpp +++ b/rpcs3/Gui/SettingsDialog.cpp @@ -252,6 +252,8 @@ SettingsDialog::SettingsDialog(wxWindow *parent, rpcs3::config_t* cfg) } #endif + cbox_gs_render->Append("Vulkan"); + for (int i = 1; i < WXSIZEOF(ResolutionTable); ++i) { cbox_gs_resolution->Append(wxString::Format("%dx%d", ResolutionTable[i].width.value(), ResolutionTable[i].height.value())); diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 1125a2a385..5713b9329f 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -1,125 +1,141 @@ - - - - - Debug - LLVM - x64 - - - Debug - MemLeak - x64 - - - Debug - x64 - - - Release - LLVM - x64 - - - Release - x64 - - - - - - - - - - - {c4a10229-4712-4bd2-b63e-50d93c67a038} - - - - {3EE5F075-B546-42C4-B6A8-E3CCEF38B78D} - Win32Proj - VKGSRender - 8.1 - - - - StaticLibrary - v140 - Unicode - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Level3 - Disabled - _DEBUG;_LIB;%(PreprocessorDefinitions) - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - Windows - - - - - Level3 - - - MaxSpeed - true - true - NDEBUG;_LIB;%(PreprocessorDefinitions) - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - Windows - true - true - - - - - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - - - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - - - ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - - - - - + + + + + Debug - LLVM + x64 + + + Debug - MemLeak + x64 + + + Debug + x64 + + + Release - LLVM + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + {c4a10229-4712-4bd2-b63e-50d93c67a038} + + + + {3EE5F075-B546-42C4-B6A8-E3CCEF38B78D} + Win32Proj + VKGSRender + 8.1 + + + + StaticLibrary + v140 + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + _DEBUG;_LIB;%(PreprocessorDefinitions) + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + Windows + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + Windows + true + true + + + + + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + + + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + + + ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) + + + + + \ No newline at end of file diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index 1868ae93ea..22eb36cdee 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -1,19 +1,67 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - - - Source Files - - - - - Source Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + \ No newline at end of file diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index 899bea4107..92ae1d3b03 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -144,7 +144,7 @@ bool Rpcs3App::OnInit() case rsx_renderer_type::OpenGL: return std::make_shared(); #ifdef _MSC_VER case rsx_renderer_type::DX12: return std::make_shared(); - case rsx_renderer_type::Vulkan: return std::make_shared<>(VKGSRender); + case rsx_renderer_type::Vulkan: return std::make_shared(); #endif default: throw EXCEPTION("Invalid GS Renderer %d", (int)mode); } diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 02ef058685..63b3a58304 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -94,12 +94,12 @@ ..\minidx9\Include;..\OpenAL\include;..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;%(AdditionalIncludeDirectories) - ..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - ..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) - VKstatic.1.lib;glslang.lib;%(AdditionalDependencies) + ..\Vulkan\glslang-build\SPIRV\Debug;..\Vulkan\glslang-build\OGLCompilersDLL\Debug;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Debug;..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Debug;..\Vulkan\glslang-build\OGLCompilersDLL\Debug;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Debug;..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Debug;..\Vulkan\glslang-build\OGLCompilersDLL\Debug;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Debug;..\Vulkan\Vulkan-build\loader\Debug;..\Vulkan\glslang-build\glslang\Debug;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Release;..\Vulkan\glslang-build\OGLCompilersDLL\Release;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Release;..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + ..\Vulkan\glslang-build\SPIRV\Release;..\Vulkan\glslang-build\OGLCompilersDLL\Release;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Release;..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\OpenAL\libs\Win64;%(AdditionalLibraryDirectories) + VKstatic.1.lib;glslang.lib;OSDependent.lib;OGLCompiler.lib;SPIRV.lib;%(AdditionalDependencies)