Meta: Also download support files of WPT tests' reference pages

Author: https://github.com/LucasChollet Commit: 2b56a53d41 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2929 Reviewed-by: https://github.com/tcl3 ✅
2025-08-04 15:19:42 +00:00 · 2024-12-15 15:39:03 -05:00 · 2024-12-15 15:39:03 -05:00 · 2b56a53d41 · 2024-12-19 09:55:33 +00:00
commit 2b56a53d41
parent cc76225c1f
1 changed files with 42 additions and 18 deletions
--- a/Meta/import-wpt-test.py
+++ b/Meta/import-wpt-test.py
@ -1,15 +1,15 @@
 #!/usr/bin/env python3
-import os
+from collections import namedtuple
-import sys
+from dataclasses import dataclass
-
+from enum import Enum
 from html.parser import HTMLParser
 from pathlib import Path
 from urllib.parse import urljoin
 from urllib.request import urlopen
 from collections import namedtuple
 from enum import Enum
 import re
 import os
 import sys
 wpt_base_url = 'https://wpt.live/'
@ -30,6 +30,18 @@ class TestType(Enum):
 PathMapping = namedtuple('PathMapping', ['source', 'destination'])
 class ResourceType(Enum):
    INPUT = 1
    EXPECTED = 2
@dataclass
 class ResourceAndType:
    resource: str
    type: ResourceType
 test_type = TestType.TEXT
 raw_reference_path = None  # As specified in the test HTML
 reference_path = None  # With parent directories
@ -96,28 +108,30 @@ class TestTypeIdentifier(HTMLParser):
                self.ref_test_link_found = True
-def map_to_path(sources: list[str], is_resource=True, resource_path=None) -> list[PathMapping]:
+def map_to_path(sources: list[ResourceAndType], is_resource=True, resource_path=None) -> list[PathMapping]:
    filepaths: list[PathMapping] = []
    for source in sources:
-        if source.startswith('/') or not is_resource:
+        base_directory = test_type.input_path if source.type == ResourceType.INPUT else test_type.expected_path
-            file_path = test_type.input_path + '/' + source
+
        if source.resource.startswith('/') or not is_resource:
            file_path = base_directory + '/' + source.resource
        else:
            # Add it as a sibling path if it's a relative resource
            sibling_location = str(Path(resource_path).parent)
-            parent_directory = test_type.input_path + '/' + sibling_location
+            parent_directory = base_directory + '/' + sibling_location
-            file_path = parent_directory + '/' + source
+            file_path = parent_directory + '/' + source.resource
        # Map to source and destination
-        output_path = wpt_base_url + file_path.replace(test_type.input_path, '')
+        output_path = wpt_base_url + file_path.replace(base_directory, '')
        filepaths.append(PathMapping(output_path, Path(file_path).absolute()))
    return filepaths
-def modify_sources(files, resources):
+def modify_sources(files, resources: list[ResourceAndType]) -> None:
    for file in files:
        # Get the distance to the wpt-imports folder
        folder_index = str(file).find(test_type.input_path)
@ -134,7 +148,7 @@ def modify_sources(files, resources):
            page_source = f.read()
        # Iterate all scripts and overwrite the src attribute
-        for i, resource in enumerate(resources):
+        for i, resource in enumerate(map(lambda r: r.resource, resources)):
            if resource.startswith('/'):
                new_src_value = parent_folder_path + resource[1::]
                page_source = page_source.replace(resource, new_src_value)
@ -212,7 +226,7 @@ def main():
    raw_reference_path = identifier.reference_path
    print(f"Identified {url_to_import} as type {test_type}, ref {raw_reference_path}")
-    main_file = [resource_path]
+    main_file = [ResourceAndType(resource_path, ResourceType.INPUT)]
    main_paths = map_to_path(main_file, False)
    if test_type == TestType.REF and raw_reference_path is None:
@ -235,11 +249,21 @@ def main():
    files_to_modify = download_files(main_paths)
    create_expectation_files(main_paths)
-    parser = LinkedResourceFinder()
+    input_parser = LinkedResourceFinder()
-    parser.feed(page)
+    input_parser.feed(page)
    additional_resources = list(map(lambda s: ResourceAndType(s, ResourceType.INPUT), input_parser.resources))
-    modify_sources(files_to_modify, parser.resources)
+    expected_parser = LinkedResourceFinder()
-    script_paths = map_to_path(parser.resources, True, resource_path)
+    for path in main_paths[1:]:
        with urlopen(path.source) as response:
            page = response.read().decode("utf-8")
            expected_parser.feed(page)
    additional_resources.extend(
        list(map(lambda s: ResourceAndType(s, ResourceType.EXPECTED), expected_parser.resources))
    )
    modify_sources(files_to_modify, additional_resources)
    script_paths = map_to_path(additional_resources, True, resource_path)
    download_files(script_paths)