gl/vk/dx12: re-implement pack/unpack operations (#1764)

dx12: implement pack/unpack operations

dx12: Fix shader compilation when pack/unpack is used

dx12: pk16/up16 - relax half-float range to more realistic values
This commit is contained in:
kd-11 2016-06-10 14:42:49 +03:00 committed by Ivan
parent db27ea923d
commit 35ab3b0cd8
2 changed files with 72 additions and 8 deletions

View file

@ -415,10 +415,10 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode)
case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); return true;
case RSX_FP_OPCODE_MOV: SetDst("$0"); return true;
case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); return true;
case RSX_FP_OPCODE_PK2: SetDst("round(clamp($0, -1.0, 1.0) * 32767.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
case RSX_FP_OPCODE_PK4: SetDst("round(clamp($0, -1.0, 1.0) * 127.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
case RSX_FP_OPCODE_PK16: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK16"); return true;
case RSX_FP_OPCODE_PKB: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKB"); return true;
case RSX_FP_OPCODE_PK2: SetDst("float(packSnorm2x16($0.xy))"); return true;
case RSX_FP_OPCODE_PK4: SetDst("float(packSnorm4x8($0))"); return true;
case RSX_FP_OPCODE_PK16: SetDst("float(packHalf2x16($0.xy))"); return true;
case RSX_FP_OPCODE_PKB: SetDst("packUnorm4x8($0 / 255.)"); return true;
case RSX_FP_OPCODE_PKG: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKG"); return true;
case RSX_FP_OPCODE_SEQ: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SEQ, "$0", "$1") + ")"); return true;
case RSX_FP_OPCODE_SFL: SetDst(getFunction(FUNCTION::FUNCTION_SFL)); return true;
@ -512,10 +512,10 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode)
return true;
}
return false;
case RSX_FP_OPCODE_UP2: SetDst("clamp($0 / 32767.0, -1.0, 1.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
case RSX_FP_OPCODE_UP4: SetDst("clamp($0 / 127.0, -1.0, 1.0)"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
case RSX_FP_OPCODE_UP16: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP16"); return true;
case RSX_FP_OPCODE_UPB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPB"); return true;
case RSX_FP_OPCODE_UP2: SetDst("unpackSnorm2x16(uint($0.x))"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
case RSX_FP_OPCODE_UP4: SetDst("unpackSnorm4x8(uint($0.x))"); return true; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478))
case RSX_FP_OPCODE_UP16: SetDst("unpackHalf2x16(uint($0.x))"); return true;
case RSX_FP_OPCODE_UPB: SetDst("(unpackUnorm4x8(uint($0.x)) * 255.)"); return true;
case RSX_FP_OPCODE_UPG: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPG"); return true;
}
return false;

View file

@ -138,5 +138,69 @@ void insert_d3d12_legacy_function(std::ostream& OS)
OS << " result.z = clamped_val.x > 0.0 ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.0;\n";
OS << " return result;\n";
OS << "}\n\n";
OS << "uint packSnorm2x16(float2 val)";
OS << "{\n";
OS << " uint high_bits = round(clamp(val.x, -1., 1.) * 32767.);\n";
OS << " uint low_bits = round(clamp(val.y, -1., 1.) * 32767.);\n";
OS << " return (high_bits << 16)|low_bits;\n";
OS << "}\n\n";
OS << "uint packSnorm4x8(float4 val)";
OS << "{\n";
OS << " uint high_bits_a = round(clamp(val.x, -1., 1.) * 127.);\n";
OS << " uint high_bits_b = round(clamp(val.y, -1., 1.) * 127.);\n";
OS << " uint low_bits_a = round(clamp(val.z, -1., 1.) * 127.);\n";
OS << " uint low_bits_b = round(clamp(val.z, -1., 1.) * 127.);\n";
OS << " return (high_bits_a << 24)|(high_bits_b << 16)|(low_bits_a << 8)|low_bits_b;\n";
OS << "}\n\n";
OS << "float2 unpackSnorm2x16(uint val)";
OS << "{\n";
OS << " float high = clamp((val >> 16) / 32767., -1., 1.);\n";
OS << " float low = clamp((val & 0x0000FFFF) / 32767., -1., 1.);\n";
OS << " return float2(high, low);\n";
OS << "}\n\n";
OS << "float4 unpackSnorm4x8(uint val)";
OS << "{\n";
OS << " float high_a = clamp((val >> 24) / 127., -1., 1.);\n";
OS << " float high_b = clamp(((val >> 16) & 0xFF) / 127., -1., 1.);\n";
OS << " float low_a = clamp(((val >> 8) & 0xFF) / 127., -1., 1.);\n";
OS << " float low_b = clamp((val & 0xFF) / 127., -1., 1.);\n";
OS << " return float4(high_a, high_b, low_a, low_b);\n";
OS << "}\n\n";
OS << "uint packUnorm4x8(float4 val)";
OS << "{\n";
OS << " uint high_bits_a = round(clamp(val.x, -1., 1.) * 255.);\n";
OS << " uint high_bits_b = round(clamp(val.y, -1., 1.) * 255.);\n";
OS << " uint low_bits_a = round(clamp(val.z, -1., 1.) * 255.);\n";
OS << " uint low_bits_b = round(clamp(val.z, -1., 1.) * 255.);\n";
OS << " return (high_bits_a << 24)|(high_bits_b << 16)|(low_bits_a << 8)|low_bits_b;\n";
OS << "}\n\n";
OS << "float4 unpackUnorm4x8(uint val)";
OS << "{\n";
OS << " float high_a = clamp((val >> 24) / 255., -1., 1.);\n";
OS << " float high_b = clamp(((val >> 16) & 0xFF) / 255., -1., 1.);\n";
OS << " float low_a = clamp(((val >> 8) & 0xFF) / 255., -1., 1.);\n";
OS << " float low_b = clamp((val & 0xFF) / 255., -1., 1.);\n";
OS << " return float4(high_a, high_b, low_a, low_b);\n";
OS << "}\n\n";
/**
* There is no easy way to do this in a shader since we cant recast the f16 blocks to u16
* Fake it and hope the program requests the corresponding f16 unpack
**/
OS << "uint packHalf2x16(float2 val)";
OS << "{\n";
OS << " return packSnorm2x16(val / 6.1E+5);\n";
OS << "}\n\n";
OS << "float2 unpackHalf2x16(uint val)";
OS << "{\n";
OS << " return unpackSnorm2x16(val) * 6.1E+5;\n";
OS << "}\n\n";
}
#endif