gl/vk/dx12: re-implement pack/unpack operations (#1764)

dx12: implement pack/unpack operations dx12: Fix shader compilation when pack/unpack is used dx12: pk16/up16 - relax half-float range to more realistic values
2025-04-20 03:25:16 +00:00 · 2016-06-10 14:42:49 +03:00 · 2016-06-10 14:42:49 +03:00 · 35ab3b0cd8
commit 35ab3b0cd8
parent db27ea923d
2 changed files with 72 additions and 8 deletions
--- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp
+++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp
@ -415,10 +415,10 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode)
 	case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); return true;
 	case RSX_FP_OPCODE_MOV: SetDst("$0"); return true;
 	case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); return true;
-	case RSX_FP_OPCODE_PK2: SetDst("round(clamp($0, -1.0, 1.0) * 32767.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
-	case RSX_FP_OPCODE_PK4: SetDst("round(clamp($0, -1.0, 1.0) * 127.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
-	case RSX_FP_OPCODE_PK16: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK16"); return true;
-	case RSX_FP_OPCODE_PKB: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKB"); return true;
+	case RSX_FP_OPCODE_PK2: SetDst("float(packSnorm2x16($0.xy))"); return true;
+	case RSX_FP_OPCODE_PK4: SetDst("float(packSnorm4x8($0))"); return true;
+	case RSX_FP_OPCODE_PK16: SetDst("float(packHalf2x16($0.xy))"); return true;
+	case RSX_FP_OPCODE_PKB: SetDst("packUnorm4x8($0 / 255.)"); return true;
 	case RSX_FP_OPCODE_PKG: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKG"); return true;
 	case RSX_FP_OPCODE_SEQ: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SEQ, "$0", "$1") + ")"); return true;
 	case RSX_FP_OPCODE_SFL: SetDst(getFunction(FUNCTION::FUNCTION_SFL)); return true;
@ -512,10 +512,10 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode)
 			return true;
 		}
 		return false;
-	case RSX_FP_OPCODE_UP2: SetDst("clamp($0 / 32767.0, -1.0, 1.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
-	case RSX_FP_OPCODE_UP4: SetDst("clamp($0 / 127.0, -1.0, 1.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
-	case RSX_FP_OPCODE_UP16: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP16"); return true;
-	case RSX_FP_OPCODE_UPB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPB"); return true;
+	case RSX_FP_OPCODE_UP2: SetDst("unpackSnorm2x16(uint($0.x))"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
+	case RSX_FP_OPCODE_UP4: SetDst("unpackSnorm4x8(uint($0.x))"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
+	case RSX_FP_OPCODE_UP16: SetDst("unpackHalf2x16(uint($0.x))"); return true;
+	case RSX_FP_OPCODE_UPB: SetDst("(unpackUnorm4x8(uint($0.x)) * 255.)"); return true;
 	case RSX_FP_OPCODE_UPG: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPG"); return true;
 	}
 	return false;
--- a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp
@ -138,5 +138,69 @@ void insert_d3d12_legacy_function(std::ostream& OS)
 	OS << "	result.z = clamped_val.x > 0.0 ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.0;\n";
 	OS << "	return result;\n";
 	OS << "}\n\n";
+
+	OS << "uint packSnorm2x16(float2 val)";
+	OS << "{\n";
+	OS << "	uint high_bits = round(clamp(val.x, -1., 1.) * 32767.);\n";
+	OS << "	uint low_bits = round(clamp(val.y, -1., 1.) * 32767.);\n";
+	OS << "	return (high_bits << 16)|low_bits;\n";
+	OS << "}\n\n";
+
+	OS << "uint packSnorm4x8(float4 val)";
+	OS << "{\n";
+	OS << "	uint high_bits_a = round(clamp(val.x, -1., 1.) * 127.);\n";
+	OS << "	uint high_bits_b = round(clamp(val.y, -1., 1.) * 127.);\n";
+	OS << "	uint low_bits_a = round(clamp(val.z, -1., 1.) * 127.);\n";
+	OS << "	uint low_bits_b = round(clamp(val.z, -1., 1.) * 127.);\n";
+	OS << "	return (high_bits_a << 24)|(high_bits_b << 16)|(low_bits_a << 8)|low_bits_b;\n";
+	OS << "}\n\n";
+
+	OS << "float2 unpackSnorm2x16(uint val)";
+	OS << "{\n";
+	OS << "	float high = clamp((val >> 16) / 32767., -1., 1.);\n";
+	OS << "	float low = clamp((val & 0x0000FFFF) / 32767., -1., 1.);\n";
+	OS << "	return float2(high, low);\n";
+	OS << "}\n\n";
+
+	OS << "float4 unpackSnorm4x8(uint val)";
+	OS << "{\n";
+	OS << "	float high_a = clamp((val >> 24) / 127., -1., 1.);\n";
+	OS << "	float high_b = clamp(((val >> 16) & 0xFF) / 127., -1., 1.);\n";
+	OS << "	float low_a = clamp(((val >> 8) & 0xFF) / 127., -1., 1.);\n";
+	OS << "	float low_b = clamp((val & 0xFF) / 127., -1., 1.);\n";
+	OS << "	return float4(high_a, high_b, low_a, low_b);\n";
+	OS << "}\n\n";
+
+	OS << "uint packUnorm4x8(float4 val)";
+	OS << "{\n";
+	OS << "	uint high_bits_a = round(clamp(val.x, -1., 1.) * 255.);\n";
+	OS << "	uint high_bits_b = round(clamp(val.y, -1., 1.) * 255.);\n";
+	OS << "	uint low_bits_a = round(clamp(val.z, -1., 1.) * 255.);\n";
+	OS << "	uint low_bits_b = round(clamp(val.z, -1., 1.) * 255.);\n";
+	OS << "	return (high_bits_a << 24)|(high_bits_b << 16)|(low_bits_a << 8)|low_bits_b;\n";
+	OS << "}\n\n";
+
+	OS << "float4 unpackUnorm4x8(uint val)";
+	OS << "{\n";
+	OS << "	float high_a = clamp((val >> 24) / 255., -1., 1.);\n";
+	OS << "	float high_b = clamp(((val >> 16) & 0xFF) / 255., -1., 1.);\n";
+	OS << "	float low_a = clamp(((val >> 8) & 0xFF) / 255., -1., 1.);\n";
+	OS << "	float low_b = clamp((val & 0xFF) / 255., -1., 1.);\n";
+	OS << "	return float4(high_a, high_b, low_a, low_b);\n";
+	OS << "}\n\n";
+
+	/**
+	* There is no easy way to do this in a shader since we cant recast the f16 blocks to u16
+	* Fake it and hope the program requests the corresponding f16 unpack
+	**/
+	OS << "uint packHalf2x16(float2 val)";
+	OS << "{\n";
+	OS << "	return packSnorm2x16(val / 6.1E+5);\n";
+	OS << "}\n\n";
+
+	OS << "float2 unpackHalf2x16(uint val)";
+	OS << "{\n";
+	OS << "	return unpackSnorm2x16(val) * 6.1E+5;\n";
+	OS << "}\n\n";
 }
 #endif