From 7d0e752590ec3ea692c9a42bc12200cd3f61fe2f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 14 Mar 2024 22:37:14 -0400 Subject: [PATCH] Meta: Add a script that takes a JBIG2 file and writes a PDF embedding it Most image viewers can't display JBIG2 files. All PDF viewers can display JBIG2 files. This is useful for checking that PDF viewers render JBIG2 files the same way we do. --- Meta/jbig2_to_pdf.py | 148 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100755 Meta/jbig2_to_pdf.py diff --git a/Meta/jbig2_to_pdf.py b/Meta/jbig2_to_pdf.py new file mode 100755 index 00000000000..16486a53d4d --- /dev/null +++ b/Meta/jbig2_to_pdf.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 + +""" +Creates a PDF that embeds a jbig2 image. Useful for viewing .jbig2 files in +PDF viewers, since all PDF viewers support .jbig2 but few image viewers do. + +Usage is a bit clunky (use Build/lagom/bin/file to get the dimensions): +% Meta/jbig2_to_pdf.py -o foo.pdf path/to/bitmap.jbig2 399 400 +% open foo.pdf +""" + +import argparse +import sys +import textwrap + + +def dedent(b): + return textwrap.dedent(b.decode('latin1')).encode('latin1') + + +def main(): + parser = argparse.ArgumentParser( + epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("image", help="Input image") + parser.add_argument("width", type=int, help="image width") + parser.add_argument("height", type=int, help="image height") + parser.add_argument("-o", "--output", help="Path to output PDF") + args = parser.parse_args() + + width, height = args.width, args.height + + with open(args.image, 'rb') as f: + image_data = f.read() + print(f'dims {width}x{height}') + + # strip jbig2 header + image_data = image_data[8:] + if image_data[0] & 1 == 0: + print('random-access jbig2 does not work', file=sys.stderr) + sys.exit(1) + if image_data[0] & 2 == 0: + image_data = image_data[4:] + image_data = image_data[1:] + + start = dedent(b'''\ + %PDF-1.4 + %\265\266 + + ''') + + objs = [dedent(b'''\ + 1 0 obj + << + /Type /Catalog + /Pages 2 0 R + >> + endobj + '''), + + dedent(b'''\ + 2 0 obj + << + /Type /Pages + /Kids [3 0 R] + /Count 1 + >> + endobj + '''), + + dedent(b'''\ + 3 0 obj + << + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 %d %d] + /Contents 4 0 R + /Resources << + /XObject << + /Im 5 0 R + >> + >> + >> + endobj + ''' % (width, height)), + + dedent(b'''\ + 4 0 obj + <> + stream + %d 0 0 %d 0 0 cm + /Im Do + endstream + endobj + ''' % (width, height)), + + dedent(b'''\ + 5 0 obj + << + /Length %d + /Type /XObject + /Subtype /Image + /Width %d + /Height %d + /ColorSpace /DeviceGray + /Filter /JBIG2Decode + /BitsPerComponent 1 + >> + stream + ''' % (len(image_data), width, height)) + + image_data + + dedent(b''' + endstream + endobj + '''), + ] + + with open(args.output, 'wb') as f: + f.write(start) + + offsets = [] + for obj in objs: + offsets.append(f.tell()) + f.write(obj) + f.write(b'\n') + + xref_offset = f.tell() + f.write(b'xref\n') + f.write(b'0 %d\n' % (len(objs) + 1)) + f.write(b'0000000000 65536 f \n') + for offset in offsets: + f.write(b'%010d 00000 n \n' % offset) + f.write(b'\n') + + f.write(dedent(b'''\ + trailer + << + /Size %d + /Root 1 0 R + >> + startxref + %d + %%%%EOF + ''' % (len(objs) + 1, xref_offset))) + + +if __name__ == '__main__': + main()