From 8673dd4e6e694631e2f07ccb6db8be2f97a08b53 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Wed, 6 Nov 2024 12:32:57 +0000 Subject: [PATCH] Meta: Include stylesheets and stylesheet-includes in WPT imports Because of this we no longer have to handle ahem.css in a special way. This should find: - - CSS `@import`s - Any resources linked from a stylesheet with `url()` There's a good chance there are other resources we'll want to copy too, but CSS was a big hole. --- Meta/import-wpt-test.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/Meta/import-wpt-test.py b/Meta/import-wpt-test.py index 1dc81774389..6ab33c51bb5 100755 --- a/Meta/import-wpt-test.py +++ b/Meta/import-wpt-test.py @@ -9,6 +9,7 @@ from urllib.parse import urljoin from urllib.request import urlopen from collections import namedtuple from enum import Enum +import re wpt_base_url = 'https://wpt.live/' @@ -35,13 +36,37 @@ reference_path = None # With parent directories src_values = [] -class ScriptSrcValueFinder(HTMLParser): +class LinkedResourceFinder(HTMLParser): + def __init__(self): + super().__init__() + self._tag_stack_ = [] + self._match_css_url_ = re.compile(r"url\(\"?(?P[^\")]+)\"?\)") + self._match_css_import_string_ = re.compile(r"@import\s+\"(?P[^\")]+)\"") def handle_starttag(self, tag, attrs): + self._tag_stack_.append(tag) if tag == "script": attr_dict = dict(attrs) if "src" in attr_dict: src_values.append(attr_dict["src"]) + if tag == "link": + attr_dict = dict(attrs) + if attr_dict["rel"] == "stylesheet": + src_values.append(attr_dict["href"]) + + def handle_endtag(self, tag): + self._tag_stack_.pop() + + def handle_data(self, data): + if self._tag_stack_ and self._tag_stack_[-1] == "style": + # Look for uses of url() + url_iterator = self._match_css_url_.finditer(data) + for match in url_iterator: + src_values.append(match.group("url")) + # Look for @imports that use plain strings - we already found the url() ones + import_iterator = self._match_css_import_string_.finditer(data) + for match in import_iterator: + src_values.append(match.group("url")) class TestTypeIdentifier(HTMLParser): @@ -111,8 +136,6 @@ def modify_sources(files): with open(file, 'r') as f: page_source = f.read() - page_source = page_source.replace('/fonts/ahem.css', '../' * parent_folder_count + 'fonts/ahem.css') - # Iterate all scripts and overwrite the src attribute for i, src_value in enumerate(src_values): if src_value.startswith('/'): @@ -208,7 +231,7 @@ def main(): files_to_modify = download_files(main_paths) create_expectation_files(main_paths) - parser = ScriptSrcValueFinder() + parser = LinkedResourceFinder() parser.feed(page) modify_sources(files_to_modify)