diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h new file mode 100644 index 0000000000..0737decd6c --- /dev/null +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -0,0 +1,255 @@ +#pragma once +#include + +#include "ShaderParam.h" + +namespace glsl +{ + enum program_domain + { + glsl_vertex_program = 0, + glsl_fragment_program = 1 + }; + + static std::string getFloatTypeNameImpl(size_t elementCount) + { + switch (elementCount) + { + default: + abort(); + case 1: + return "float"; + case 2: + return "vec2"; + case 3: + return "vec3"; + case 4: + return "vec4"; + } + } + + static std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1) + { + switch (f) + { + case COMPARE::FUNCTION_SEQ: + return "equal(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGE: + return "greaterThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGT: + return "greaterThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLE: + return "lessThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLT: + return "lessThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SNE: + return "notEqual(" + Op0 + ", " + Op1 + ")"; + } + fmt::throw_exception("Unknown compare function" HERE); + } + + static void insert_vertex_input_fetch(std::stringstream& OS) + { + //Actually decode a vertex attribute from a raw byte stream + OS << "struct attribute_desc\n"; + OS << "{\n"; + OS << " int type;\n"; + OS << " int attribute_size;\n"; + OS << " int starting_offset;\n"; + OS << " int stride;\n"; + OS << " int swap_bytes;\n"; + OS << " int is_volatile;\n"; + OS << " int frequency;\n"; + OS << " int divisor;\n"; + OS << " int modulo;\n"; + OS << "};\n\n"; + + OS << "uint get_bits(uvec4 v, int swap)\n"; + OS << "{\n"; + OS << " if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24);\n"; + OS << " return (v.x | v.y << 8 | v.z << 16 | v.w << 24);\n"; + OS << "}\n\n"; + + OS << "uint get_bits(uvec2 v, int swap)\n"; + OS << "{\n"; + OS << " if (swap != 0) return (v.y | v.x << 8);\n"; + OS << " return (v.x | v.y << 8);\n"; + OS << "}\n\n"; + + OS << "int preserve_sign_s16(uint bits)\n"; + OS << "{\n"; + OS << " //convert raw 16 bit value into signed 32-bit integer counterpart\n"; + OS << " uint sign = bits & 0x8000;\n"; + OS << " if (sign != 0) return int(bits | 0xFFFF0000);\n"; + OS << " return int(bits);\n"; + OS << "}\n\n"; + + /* TODO: For intel GPUs that seemingly cannot generate fp32 values from raw bits + OS << "float convert_to_f32(uint bits)\n"; + OS << "{\n"; + OS << " uint sign = (bits >> 31) & 1;\n"; + OS << " uint exp = (bits >> 23) & 0xff;\n"; + OS << " uint mantissa = bits & 0x7fffff;\n"; + OS << " float base = (sign != 0)? -1.f: 1.f;\n"; + OS << " base *= exp2(exp - 127);\n"; + OS << " float scale = 0.f;\n\n"; + OS << " for (int x = 0; x < 23; x++)\n"; + OS << " {\n"; + OS << " int inv = (22 - x);\n"; + OS << " if ((mantissa & (1 << inv)) == 0) continue;\n"; + OS << " scale += 1.f / pow(2.f, float(inv));\n"; + OS << " }\n"; + OS << " return base * scale;\n"; + OS << "}\n";*/ + + OS << "#define get_s16(v, s) preserve_sign_s16(get_bits(v, s))\n\n"; + + OS << "vec4 fetch_attribute(attribute_desc desc, int vertex_id, usamplerBuffer input_stream)\n"; + OS << "{\n"; + OS << " vec4 result = vec4(0., 0., 0., 1.);\n"; + OS << " vec4 scale = vec4(1.);\n"; + OS << " uvec4 tmp;\n"; + OS << " uint bits;\n"; + OS << " bool reverse_order = false;\n"; + OS << "\n"; + OS << " int first_byte = (vertex_id * desc.stride) + desc.starting_offset;\n"; + OS << " for (int n = 0; n < desc.attribute_size; n++)\n"; + OS << " {\n"; + OS << " switch (desc.type)\n"; + OS << " {\n"; + OS << " case 0:\n"; + OS << " //signed normalized 16-bit\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; + OS << " scale[n] = 32767.;\n"; + OS << " break;\n"; + OS << " case 1:\n"; + OS << " //float\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));\n"; + OS << " break;\n"; + OS << " case 2:\n"; + OS << " //half\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = unpackHalf2x16(uint(get_bits(tmp.xy, desc.swap_bytes))).x;\n"; + OS << " break;\n"; + OS << " case 3:\n"; + OS << " //unsigned byte\n"; + OS << " result[n] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " scale[n] = 255.;\n"; + OS << " reverse_order = (desc.swap_bytes != 0);\n"; + OS << " break;\n"; + OS << " case 4:\n"; + OS << " //signed word\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; + OS << " break;\n"; + OS << " case 5:\n"; + OS << " //cmp\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " bits = get_bits(tmp, desc.swap_bytes);\n"; + OS << " result.x = preserve_sign_s16((bits & 0x7FF) << 5);\n"; + OS << " result.y = preserve_sign_s16(((bits >> 11) & 0x7FF) << 5);\n"; + OS << " result.z = preserve_sign_s16(((bits >> 22) & 0x3FF) << 6);\n"; + OS << " result.w = 1.;\n"; + OS << " scale = vec4(32767., 32767., 32767., 1.);\n"; + OS << " break;\n"; + OS << " case 6:\n"; + OS << " //ub256\n"; + OS << " result[n] = float(texelFetch(input_stream, first_byte++).x);\n"; + OS << " reverse_order = (desc.swap_bytes != 0);\n"; + OS << " break;\n"; + OS << " }\n"; + OS << " }\n\n"; + OS << " result /= scale;\n"; + OS << " return (reverse_order)? result.wzyx: result;\n"; + OS << "}\n\n"; + + OS << "attribute_desc fetch_desc(int location)\n"; + OS << "{\n"; + OS << " attribute_desc result;\n"; + OS << " int attribute_flags = input_attributes[location].w;\n"; + OS << " result.type = input_attributes[location].x;\n"; + OS << " result.attribute_size = input_attributes[location].y;\n"; + OS << " result.starting_offset = input_attributes[location].z;\n"; + OS << " result.stride = attribute_flags & 0xFF;\n"; + OS << " result.swap_bytes = (attribute_flags >> 8) & 0x1;\n"; + OS << " result.is_volatile = (attribute_flags >> 9) & 0x1;\n"; + OS << " result.frequency = (attribute_flags >> 10) & 0x3;\n"; + OS << " result.modulo = (attribute_flags >> 12) & 0x1;\n"; + OS << " result.divisor = (attribute_flags >> 13) & 0xFFFF;\n"; + OS << " return result;\n"; + OS << "}\n\n"; + + OS << "vec4 read_location(int location)\n"; + OS << "{\n"; + OS << " attribute_desc desc = fetch_desc(location);\n"; + OS << "\n"; + OS << " int vertex_id = gl_VertexID - int(vertex_base_index);\n"; + OS << " if (desc.frequency == 0)\n"; + OS << " vertex_id = 0;\n"; + OS << " else if (desc.frequency > 1)\n"; + OS << " {\n"; + OS << " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"; + OS << " if (desc.modulo != 0)\n"; + OS << " vertex_id = gl_VertexID % desc.divisor;\n"; + OS << " else\n"; + OS << " vertex_id = gl_VertexID / desc.divisor;\n"; + OS << " }\n"; + OS << "\n"; + OS << " if (desc.is_volatile != 0)\n"; + OS << " return fetch_attribute(desc, vertex_id, volatile_input_stream);\n"; + OS << " else\n"; + OS << " return fetch_attribute(desc, vertex_id, persistent_input_stream);\n"; + OS << "}\n\n"; + } + + static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain) + { + OS << "vec4 lit_legacy(vec4 val)"; + OS << "{\n"; + OS << " vec4 clamped_val = val;\n"; + OS << " clamped_val.x = max(val.x, 0.);\n"; + OS << " clamped_val.y = max(val.y, 0.);\n"; + OS << " vec4 result;\n"; + OS << " result.x = 1.;\n"; + OS << " result.w = 1.;\n"; + OS << " result.y = clamped_val.x;\n"; + OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.;\n"; + OS << " return result;\n"; + OS << "}\n\n"; + + if (domain == glsl::program_domain::glsl_vertex_program) + return; + + //NOTE: After testing with GOW, the w component is either the original depth or wraps around to the x component + //Since component.r == depth_value with some precision loss, just use the precise depth value for now (further testing needed) + OS << "vec4 decodeLinearDepth(float depth_value)\n"; + OS << "{\n"; + OS << " uint value = uint(depth_value * 16777215);\n"; + OS << " uint b = (value & 0xff);\n"; + OS << " uint g = (value >> 8) & 0xff;\n"; + OS << " uint r = (value >> 16) & 0xff;\n"; + OS << " return vec4(float(r)/255., float(g)/255., float(b)/255., depth_value);\n"; + OS << "}\n\n"; + + OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord)\n"; + OS << "{\n"; + OS << " return decodeLinearDepth(texture(tex, coord.xy).r);\n"; + OS << "}\n\n"; + + OS << "vec4 texture2DReconstruct(sampler2DRect tex, vec2 coord)\n"; + OS << "{\n"; + OS << " return decodeLinearDepth(texture(tex, coord.xy).r);\n"; + OS << "}\n\n"; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp index 770ab69c2c..18d06e2614 100644 --- a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp @@ -1,22 +1,6 @@ #include "stdafx.h" #include "GLCommonDecompiler.h" -std::string getFloatTypeNameImpl(size_t elementCount) -{ - switch (elementCount) - { - default: - abort(); - case 1: - return "float"; - case 2: - return "vec2"; - case 3: - return "vec3"; - case 4: - return "vec4"; - } -} std::string getFunctionImpl(FUNCTION f) { @@ -84,55 +68,3 @@ std::string getFunctionImpl(FUNCTION f) return "texture2DReconstruct($t, $0.xy * $t_coord_scale)"; } } - -std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1) -{ - switch (f) - { - case COMPARE::FUNCTION_SEQ: - return "equal(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SGE: - return "greaterThanEqual(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SGT: - return "greaterThan(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SLE: - return "lessThanEqual(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SLT: - return "lessThan(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SNE: - return "notEqual(" + Op0 + ", " + Op1 + ")"; - } - fmt::throw_exception("Unknown compare function" HERE); -} - -void insert_glsl_legacy_function(std::ostream& OS, gl::glsl::program_domain domain) -{ - OS << "vec4 lit_legacy(vec4 val)"; - OS << "{\n"; - OS << " vec4 clamped_val = val;\n"; - OS << " clamped_val.x = max(val.x, 0);\n"; - OS << " clamped_val.y = max(val.y, 0);\n"; - OS << " vec4 result;\n"; - OS << " result.x = 1.0;\n"; - OS << " result.w = 1.;\n"; - OS << " result.y = clamped_val.x;\n"; - OS << " result.z = clamped_val.x > 0.0 ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.0;\n"; - OS << " return result;\n"; - OS << "}\n\n"; - - if (domain != gl::glsl::program_domain::glsl_fragment_program) - return; - - //NOTE: We lose precision if we just store depth value into 8-bit textures i.e (depth, 0, 0) - //NOTE2: After testing with GOW, the w component is either the original depth or wraps around to the x component - //Since component.r == depth_value with some precision loss, just use the precise depth value for now (further testing needed) - OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord)\n"; - OS << "{\n"; - OS << " float depth_value = texture(tex, coord.xy).r;\n"; - OS << " uint value = uint(depth_value * 16777215);\n"; - OS << " uint b = (value & 0xff);\n"; - OS << " uint g = (value >> 8) & 0xff;\n"; - OS << " uint r = (value >> 16) & 0xff;\n"; - OS << " return vec4(float(r)/255., float(g)/255., float(b)/255., depth_value);\n"; - OS << "}\n\n"; -} diff --git a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h index 2359a45fb5..69e2495c55 100644 --- a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h +++ b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h @@ -1,10 +1,7 @@ #pragma once #include "../Common/ShaderParam.h" -#include "GLHelpers.h" +#include "../Common/GLSLCommon.h" #include -std::string getFloatTypeNameImpl(size_t elementCount); std::string getFunctionImpl(FUNCTION f); -std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1); -void insert_glsl_legacy_function(std::ostream& OS, gl::glsl::program_domain domain); diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index f559f236a5..547873da96 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -9,7 +9,7 @@ std::string GLFragmentDecompilerThread::getFloatTypeName(size_t elementCount) { - return getFloatTypeNameImpl(elementCount); + return glsl::getFloatTypeNameImpl(elementCount); } std::string GLFragmentDecompilerThread::getFunction(FUNCTION f) @@ -24,7 +24,7 @@ std::string GLFragmentDecompilerThread::saturate(const std::string & code) std::string GLFragmentDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) { - return compareFunctionImpl(f, Op0, Op1); + return glsl::compareFunctionImpl(f, Op0, Op1); } void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS) @@ -221,7 +221,7 @@ namespace void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) { - insert_glsl_legacy_function(OS, gl::glsl::glsl_fragment_program); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program); const std::set output_values = { diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 607908bfb4..4ad99e7dd0 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -2043,12 +2043,6 @@ namespace gl namespace glsl { - enum program_domain - { - glsl_vertex_program = 0, - glsl_fragment_program = 1 - }; - class compilation_exception : public exception { public: diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index cf2cc39250..44e64ddd91 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -9,7 +9,7 @@ std::string GLVertexDecompilerThread::getFloatTypeName(size_t elementCount) { - return getFloatTypeNameImpl(elementCount); + return glsl::getFloatTypeNameImpl(elementCount); } std::string GLVertexDecompilerThread::getIntTypeName(size_t elementCount) @@ -25,7 +25,7 @@ std::string GLVertexDecompilerThread::getFunction(FUNCTION f) std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) { - return compareFunctionImpl(f, Op0, Op1); + return glsl::compareFunctionImpl(f, Op0, Op1); } void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) @@ -151,193 +151,13 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: namespace { - std::string expand_to_vec4(std::string value, u8 vector_size) - { - switch (vector_size) - { - case 2: - return "vec4(" + value + ", " + value + ", 1., 1.)"; - case 3: - return "vec4(" + value + ", " + value + ", " + value + ", 1.)"; - default: - LOG_ERROR(RSX, "invalid vector size %d" HERE, vector_size); - case 1: - case 4: - //Expand not required - //In case its one component, read is swizzled as .xxxx (GOW1 loading screen) - return value; - } - } - void insert_vertex_input_fetch(std::stringstream& OS) - { - //Actually decode a vertex attribute from a raw byte stream - OS << "struct attribute_desc\n"; - OS << "{\n"; - OS << " int type;\n"; - OS << " int attribute_size;\n"; - OS << " int starting_offset;\n"; - OS << " int stride;\n"; - OS << " int swap_bytes;\n"; - OS << " int is_volatile;\n"; - OS << " int frequency;\n"; - OS << " int divisor;\n"; - OS << " int modulo;\n"; - OS << "};\n\n"; - - OS << "uint get_bits(uvec4 v, int swap)\n"; - OS << "{\n"; - OS << " if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24);\n"; - OS << " return (v.x | v.y << 8 | v.z << 16 | v.w << 24);\n"; - OS << "}\n\n"; - - OS << "uint get_bits(uvec2 v, int swap)\n"; - OS << "{\n"; - OS << " if (swap != 0) return (v.y | v.x << 8);\n"; - OS << " return (v.x | v.y << 8);\n"; - OS << "}\n\n"; - - OS << "int preserve_sign_s16(uint bits)\n"; - OS << "{\n"; - OS << " //convert raw 16 bit value into signed 32-bit integer counterpart\n"; - OS << " uint sign = bits & 0x8000;\n"; - OS << " if (sign != 0) return int(bits | 0xFFFF0000);\n"; - OS << " return int(bits);\n"; - OS << "}\n\n"; - - OS << "float convert_to_f32(uint bits)\n"; - OS << "{\n"; - OS << " uint sign = (bits >> 31) & 1;\n"; - OS << " uint exp = (bits >> 23) & 0xff;\n"; - OS << " uint mantissa = bits & 0x7fffff;\n"; - OS << " float base = (sign != 0)? -1.f: 1.f;\n"; - OS << " base *= exp2(exp - 127);\n"; - OS << " float scale = 0.f;\n\n"; - OS << " for (int x = 0; x < 23; x++)\n"; - OS << " {\n"; - OS << " int inv = (22 - x);\n"; - OS << " if ((mantissa & (1 << inv)) == 0) continue;\n"; - OS << " scale += 1.f / pow(2.f, float(inv));\n"; - OS << " }\n"; - OS << " return base * scale;\n"; - OS << "}\n"; - - OS << "#define get_s16(v, s) preserve_sign_s16(get_bits(v, s))\n\n"; - - OS << "vec4 fetch_attribute(attribute_desc desc, int vertex_id, usamplerBuffer input_stream)\n"; - OS << "{\n"; - OS << " vec4 result = vec4(0., 0., 0., 1.);\n"; - OS << " vec4 scale = vec4(1.);\n"; - OS << " uvec4 tmp;\n"; - OS << " uint bits;\n"; - OS << " bool reverse_order = false;\n"; - OS << "\n"; - OS << " int first_byte = (vertex_id * desc.stride) + desc.starting_offset;\n"; - OS << " for (int n = 0; n < desc.attribute_size; n++)\n"; - OS << " {\n"; - OS << " switch (desc.type)\n"; - OS << " {\n"; - OS << " case 0:\n"; - OS << " //signed normalized 16-bit\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; - OS << " scale[n] = 32767.;\n"; - OS << " break;\n"; - OS << " case 1:\n"; - OS << " //float\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));\n"; - OS << " break;\n"; - OS << " case 2:\n"; - OS << " //half\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = unpackHalf2x16(uint(get_bits(tmp.xy, desc.swap_bytes))).x;\n"; - OS << " break;\n"; - OS << " case 3:\n"; - OS << " //unsigned byte\n"; - OS << " result[n] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " scale[n] = 255.;\n"; - OS << " reverse_order = (desc.swap_bytes != 0);\n"; - OS << " break;\n"; - OS << " case 4:\n"; - OS << " //signed word\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; - OS << " break;\n"; - OS << " case 5:\n"; - OS << " //cmp\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " bits = get_bits(tmp, desc.swap_bytes);\n"; - OS << " result.x = preserve_sign_s16((bits & 0x7FF) << 5);\n"; - OS << " result.y = preserve_sign_s16(((bits >> 11) & 0x7FF) << 5);\n"; - OS << " result.z = preserve_sign_s16(((bits >> 22) & 0x3FF) << 6);\n"; - OS << " result.w = 1.;\n"; - OS << " scale = vec4(32767., 32767., 32767., 1.);\n"; - OS << " break;\n"; - OS << " case 6:\n"; - OS << " //ub256\n"; - OS << " result[n] = float(texelFetch(input_stream, first_byte++).x);\n"; - OS << " reverse_order = (desc.swap_bytes != 0);\n"; - OS << " break;\n"; - OS << " }\n"; - OS << " }\n\n"; - OS << " result /= scale;\n"; - OS << " return (reverse_order)? result.wzyx: result;\n"; - OS << "}\n\n"; - - OS << "attribute_desc fetch_desc(int location)\n"; - OS << "{\n"; - OS << " attribute_desc result;\n"; - OS << " int attribute_flags = input_attributes[location].w;\n"; - OS << " result.type = input_attributes[location].x;\n"; - OS << " result.attribute_size = input_attributes[location].y;\n"; - OS << " result.starting_offset = input_attributes[location].z;\n"; - OS << " result.stride = attribute_flags & 0xFF;\n"; - OS << " result.swap_bytes = (attribute_flags >> 8) & 0x1;\n"; - OS << " result.is_volatile = (attribute_flags >> 9) & 0x1;\n"; - OS << " result.frequency = (attribute_flags >> 10) & 0x3;\n"; - OS << " result.modulo = (attribute_flags >> 12) & 0x1;\n"; - OS << " result.divisor = (attribute_flags >> 13) & 0xFFFF;\n"; - OS << " return result;\n"; - OS << "}\n\n"; - - OS << "vec4 read_location(int location)\n"; - OS << "{\n"; - OS << " attribute_desc desc = fetch_desc(location);\n"; - OS << "\n"; - OS << " int vertex_id = gl_VertexID - int(vertex_base_index);\n"; - OS << " if (desc.frequency == 0)\n"; - OS << " vertex_id = 0;\n"; - OS << " else if (desc.frequency > 1)\n"; - OS << " {\n"; - OS << " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"; - OS << " if (desc.modulo != 0)\n"; - OS << " vertex_id = gl_VertexID % desc.divisor;\n"; - OS << " else\n"; - OS << " vertex_id = gl_VertexID / desc.divisor;\n"; - OS << " }\n"; - OS << "\n"; - OS << " if (desc.is_volatile != 0)\n"; - OS << " return fetch_attribute(desc, vertex_id, volatile_input_stream);\n"; - OS << " else\n"; - OS << " return fetch_attribute(desc, vertex_id, persistent_input_stream);\n"; - OS << "}\n\n"; - } } void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) { - insert_glsl_legacy_function(OS, gl::glsl::glsl_vertex_program); - insert_vertex_input_fetch(OS); + insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); + glsl::insert_vertex_input_fetch(OS); std::string parameters = ""; for (int i = 0; i < 16; ++i) diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 8ae1f0ef4a..f3979b09ac 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -8,23 +8,6 @@ namespace vk { static TBuiltInResource g_default_config; - std::string getFloatTypeNameImpl(size_t elementCount) - { - switch (elementCount) - { - default: - abort(); - case 1: - return "float"; - case 2: - return "vec2"; - case 3: - return "vec3"; - case 4: - return "vec4"; - } - } - std::string getFunctionImpl(FUNCTION f) { switch (f) @@ -84,66 +67,6 @@ namespace vk } } - std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1) - { - switch (f) - { - case COMPARE::FUNCTION_SEQ: - return "equal(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SGE: - return "greaterThanEqual(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SGT: - return "greaterThan(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SLE: - return "lessThanEqual(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SLT: - return "lessThan(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SNE: - return "notEqual(" + Op0 + ", " + Op1 + ")"; - } - fmt::throw_exception("Unknown compare function" HERE); - } - - void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain) - { - OS << "vec4 lit_legacy(vec4 val)"; - OS << "{\n"; - OS << " vec4 clamped_val = val;\n"; - OS << " clamped_val.x = max(val.x, 0.);\n"; - OS << " clamped_val.y = max(val.y, 0.);\n"; - OS << " vec4 result;\n"; - OS << " result.x = 1.;\n"; - OS << " result.w = 1.;\n"; - OS << " result.y = clamped_val.x;\n"; - OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.;\n"; - OS << " return result;\n"; - OS << "}\n\n"; - - if (domain == glsl::program_domain::glsl_vertex_program) - return; - - //NOTE: After testing with GOW, the w component is either the original depth or wraps around to the x component - //Since component.r == depth_value with some precision loss, just use the precise depth value for now (further testing needed) - OS << "vec4 decodeLinearDepth(float depth_value)\n"; - OS << "{\n"; - OS << " uint value = uint(depth_value * 16777215);\n"; - OS << " uint b = (value & 0xff);\n"; - OS << " uint g = (value >> 8) & 0xff;\n"; - OS << " uint r = (value >> 16) & 0xff;\n"; - OS << " return vec4(float(r)/255., float(g)/255., float(b)/255., depth_value);\n"; - OS << "}\n\n"; - - OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord)\n"; - OS << "{\n"; - OS << " return decodeLinearDepth(texture(tex, coord.xy).r);\n"; - OS << "}\n\n"; - - OS << "vec4 texture2DReconstruct(sampler2DRect tex, vec2 coord)\n"; - OS << "{\n"; - OS << " return decodeLinearDepth(texture(tex, coord.xy).r);\n"; - OS << "}\n\n"; - } - void init_default_resources(TBuiltInResource &rsc) { rsc.maxLights = 32; @@ -274,9 +197,9 @@ namespace vk fmt::throw_exception("Unknown register name: %s" HERE, name); } - bool compile_glsl_to_spv(std::string& shader, glsl::program_domain domain, std::vector& spv) + bool compile_glsl_to_spv(std::string& shader, program_domain domain, std::vector& spv) { - EShLanguage lang = (domain == glsl::glsl_fragment_program) ? EShLangFragment : EShLangVertex; + EShLanguage lang = (domain == glsl_fragment_program) ? EShLangFragment : EShLangVertex; glslang::TProgram program; glslang::TShader shader_object(lang); diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h index 7bf06e183d..a1e993449e 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h @@ -1,22 +1,22 @@ #pragma once #include "../Common/ShaderParam.h" -#include "VKHelpers.h" +#include "../Common/GLSLCommon.h" namespace vk { + using namespace ::glsl; + struct varying_register_t { std::string name; int reg_location; }; - std::string getFloatTypeNameImpl(size_t elementCount); + //Decompiler function lookup std::string getFunctionImpl(FUNCTION f); - std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1); - void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain); const varying_register_t& get_varying_register(const std::string& name); - bool compile_glsl_to_spv(std::string& shader, glsl::program_domain domain, std::vector &spv); + bool compile_glsl_to_spv(std::string& shader, program_domain domain, std::vector &spv); void initialize_compiler_context(); void finalize_compiler_context(); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 7ddef3c22a..f194115076 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -9,7 +9,7 @@ std::string VKFragmentDecompilerThread::getFloatTypeName(size_t elementCount) { - return vk::getFloatTypeNameImpl(elementCount); + return glsl::getFloatTypeNameImpl(elementCount); } std::string VKFragmentDecompilerThread::getFunction(FUNCTION f) @@ -24,7 +24,7 @@ std::string VKFragmentDecompilerThread::saturate(const std::string & code) std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) { - return vk::compareFunctionImpl(f, Op0, Op1); + return glsl::compareFunctionImpl(f, Op0, Op1); } void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) @@ -135,7 +135,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) vk::glsl::program_input in; in.location = location; - in.domain = vk::glsl::glsl_fragment_program; + in.domain = glsl::glsl_fragment_program; in.name = PI.name; in.type = vk::glsl::input_type_texture; @@ -169,7 +169,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) vk::glsl::program_input in; in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; - in.domain = vk::glsl::glsl_fragment_program; + in.domain = glsl::glsl_fragment_program; in.name = "FragmentConstantsBuffer"; in.type = vk::glsl::input_type_uniform_buffer; @@ -229,7 +229,7 @@ namespace vk void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) { - vk::insert_glsl_legacy_function(OS, vk::glsl::program_domain::glsl_fragment_program); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program); const std::set output_values = { @@ -481,7 +481,7 @@ void VKFragmentProgram::Compile() fs::file(fs::get_config_dir() + "shaderlog/FragmentProgram.spirv", fs::rewrite).write(shader); std::vector spir_v; - if (!vk::compile_glsl_to_spv(shader, vk::glsl::glsl_fragment_program, spir_v)) + if (!vk::compile_glsl_to_spv(shader, glsl::glsl_fragment_program, spir_v)) fmt::throw_exception("Failed to compile fragment shader" HERE); //Create the object and compile diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index d7bebee8a7..1400b104d8 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -17,6 +17,7 @@ #include "../GCM.h" #include "../Common/TextureUtils.h" #include "../Common/ring_buffer_helper.h" +#include "../Common/GLSLCommon.h" #include "../rsx_cache.h" #define DESCRIPTOR_MAX_DRAW_CALLS 4096 @@ -1374,12 +1375,6 @@ namespace vk namespace glsl { - enum program_domain - { - glsl_vertex_program = 0, - glsl_fragment_program = 1 - }; - enum program_input_type { input_type_uniform_buffer = 0, @@ -1404,7 +1399,7 @@ namespace vk struct program_input { - program_domain domain; + ::glsl::program_domain domain; program_input_type type; bound_buffer as_buffer; @@ -1428,7 +1423,7 @@ namespace vk program(program&& other) = delete; ~program(); - program& load_uniforms(program_domain domain, const std::vector& inputs); + program& load_uniforms(::glsl::program_domain domain, const std::vector& inputs); bool has_uniform(std::string uniform_name); void bind_uniform(VkDescriptorImageInfo image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 2b836f38f8..b27786847a 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -5,11 +5,13 @@ namespace vk { namespace glsl { + using namespace ::glsl; + program::program(VkDevice dev, VkPipeline p, const std::vector &vertex_input, const std::vector& fragment_inputs) : m_device(dev), pipeline(p) { - load_uniforms(glsl::program_domain::glsl_vertex_program, vertex_input); - load_uniforms(glsl::program_domain::glsl_vertex_program, fragment_inputs); + load_uniforms(program_domain::glsl_vertex_program, vertex_input); + load_uniforms(program_domain::glsl_vertex_program, fragment_inputs); attribute_location_mask = 0; vertex_attributes_mask = 0; } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 7978594e2a..3e5351e472 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -7,7 +7,7 @@ std::string VKVertexDecompilerThread::getFloatTypeName(size_t elementCount) { - return vk::getFloatTypeNameImpl(elementCount); + return glsl::getFloatTypeNameImpl(elementCount); } std::string VKVertexDecompilerThread::getIntTypeName(size_t elementCount) @@ -23,7 +23,7 @@ std::string VKVertexDecompilerThread::getFunction(FUNCTION f) std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) { - return vk::compareFunctionImpl(f, Op0, Op1); + return glsl::compareFunctionImpl(f, Op0, Op1); } void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) @@ -39,7 +39,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) vk::glsl::program_input in; in.location = SCALE_OFFSET_BIND_SLOT; - in.domain = vk::glsl::glsl_vertex_program; + in.domain = glsl::glsl_vertex_program; in.name = "ScaleOffsetBuffer"; in.type = vk::glsl::input_type_uniform_buffer; @@ -74,7 +74,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v { vk::glsl::program_input in; in.location = (int)std::get<0>(item) + VERTEX_BUFFERS_FIRST_BIND_SLOT; - in.domain = vk::glsl::glsl_vertex_program; + in.domain = glsl::glsl_vertex_program; in.name = PI.name + "_buffer"; in.type = vk::glsl::input_type_texel_buffer; @@ -108,7 +108,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std vk::glsl::program_input in; in.location = VERTEX_CONSTANT_BUFFERS_BIND_SLOT; - in.domain = vk::glsl::glsl_vertex_program; + in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_uniform_buffer; @@ -249,7 +249,7 @@ namespace vk void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) { - vk::insert_glsl_legacy_function(OS, vk::glsl::program_domain::glsl_vertex_program); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); std::string parameters = ""; for (int i = 0; i < 16; ++i) @@ -405,7 +405,7 @@ void VKVertexProgram::Compile() fs::file(fs::get_config_dir() + "shaderlog/VertexProgram.spirv", fs::rewrite).write(shader); std::vector spir_v; - if (!vk::compile_glsl_to_spv(shader, vk::glsl::glsl_vertex_program, spir_v)) + if (!vk::compile_glsl_to_spv(shader, glsl::glsl_vertex_program, spir_v)) fmt::throw_exception("Failed to compile vertex shader" HERE); VkShaderModuleCreateInfo vs_info; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 3a4bec17f2..8ec112c7c4 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -638,6 +638,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 8663755a99..abc827a06b 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1783,5 +1783,8 @@ Utilities + + Emu\GPU\RSX\Common + \ No newline at end of file