diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 723058ecc9..24559b4b2b 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -214,6 +214,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA; m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA; m_shader_props.supports_native_fp16 = device_props.has_native_half_support; + m_shader_props.srgb_output_rounding = ::gl::get_driver_caps().vendor_NVIDIA; glsl::insert_glsl_legacy_function(OS, m_shader_props); } diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 1c44360be4..ef14c2bdd1 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -510,10 +510,10 @@ namespace glsl OS << " else if (srgb_convert)\n" " {\n" - " " << reg0 << ".rgb = clamp16(linear_to_srgb(" << reg0 << ")).rgb;\n" - " " << reg1 << ".rgb = clamp16(linear_to_srgb(" << reg1 << ")).rgb;\n" - " " << reg2 << ".rgb = clamp16(linear_to_srgb(" << reg2 << ")).rgb;\n" - " " << reg3 << ".rgb = clamp16(linear_to_srgb(" << reg3 << ")).rgb;\n" + " " << reg0 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n" + " " << reg1 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n" + " " << reg2 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n" + " " << reg3 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n" " }\n"; } else @@ -521,10 +521,10 @@ namespace glsl OS << " else if (srgb_convert)\n" " {\n" - " " << reg0 << ".rgb = linear_to_srgb(" << reg0 << ").rgb;\n" - " " << reg1 << ".rgb = linear_to_srgb(" << reg1 << ").rgb;\n" - " " << reg2 << ".rgb = linear_to_srgb(" << reg2 << ").rgb;\n" - " " << reg3 << ".rgb = linear_to_srgb(" << reg3 << ").rgb;\n" + " " << reg0 << " = round_to_8bit(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n" + " " << reg1 << " = round_to_8bit(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n" + " " << reg2 << " = round_to_8bit(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n" + " " << reg3 << " = round_to_8bit(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n" " }\n"; } } @@ -561,6 +561,20 @@ namespace glsl OS << "#define _kill() discard\n\n"; } + if (!props.fp32_outputs) + { + OS << "// Workaround broken output rounding behavior\n"; + if (props.srgb_output_rounding) + { + const auto scale = (props.supports_native_fp16) ? "float16_t(255.)" : "255."; + OS << "#define round_to_8bit(v4) (round(v4 * " << scale << ") / " << scale << ")\n\n"; + } + else + { + OS << "#define round_to_8bit(v4) (v4)\n\n"; + } + } + if (props.require_texture_ops) { OS << diff --git a/rpcs3/Emu/RSX/Program/GLSLTypes.h b/rpcs3/Emu/RSX/Program/GLSLTypes.h index ac9a917f76..6e1461aafa 100644 --- a/rpcs3/Emu/RSX/Program/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Program/GLSLTypes.h @@ -37,5 +37,6 @@ namespace glsl bool low_precision_tests : 1; bool disable_early_discard : 1; bool supports_native_fp16 : 1; + bool srgb_output_rounding : 1; }; }; diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 748caf8222..e3ea50afd9 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -251,6 +251,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.low_precision_tests = device_props.has_low_precision_rounding; m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA; m_shader_props.supports_native_fp16 = device_props.has_native_half_support; + m_shader_props.srgb_output_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; glsl::insert_glsl_legacy_function(OS, m_shader_props); }