#!/usr/bin/env python3 import os import re import subprocess import sys RE_RELEVANT_FILE = re.compile("^Tests/LibWeb/(Layout|Ref|Screenshot|Text)/(.(?!wpt-import/))*\\.html$") # Exclude files with encodings that would cause python to error out. # FIXME: Ideally, these should be supported. EXCLUDED_FILES = [ "Tests/LibWeb/Layout/input/html-encoding-detection-crash.html", "Tests/LibWeb/Layout/input/utf-16-be-xhtml-file-should-decode-correctly.html", ] RE_DOCTYPE = re.compile("^", re.IGNORECASE) def should_check_file(filename): if filename in EXCLUDED_FILES: return False return RE_RELEVANT_FILE.match(filename) is not None def find_files_here_or_argv(): if len(sys.argv) > 1: raw_list = sys.argv[1:] else: process = subprocess.run(["git", "ls-files"], check=True, capture_output=True) raw_list = process.stdout.decode().strip("\n").split("\n") return filter(should_check_file, raw_list) def run(): files_with_missing_doctypes = [] for filename in find_files_here_or_argv(): with open(filename, "r") as file: if not RE_DOCTYPE.search(file.readline()): files_with_missing_doctypes.append(filename) if files_with_missing_doctypes: print( "The following HTML files should include a doctype declaration at the start of the file but don't:\n" + "You should add to the very beginning of these files, except if they absolutely need " + "to run in quirks mode. In that case, you can clearly indicate so with a bogus doctype that says " + '"quirks" instead of "html".\n', " ".join(files_with_missing_doctypes), ) sys.exit(1) if __name__ == "__main__": os.chdir(os.path.dirname(__file__) + "/..") run()