From d6f419af53abbee95a867e21484643a69619c8ce Mon Sep 17 00:00:00 2001
From: Tim Ledbetter <tim.ledbetter@ladybird.org>
Date: Mon, 7 Jul 2025 14:43:28 +0100
Subject: [PATCH] Meta: Add `--wpt-base-url` option to the WPT importer

This allows files to be downloaded locally by first running:
`./Meta/WPT.sh serve` then importing with
`--wpt-base-url http://web-platform.test:8000`.
---
 Meta/WPT.sh             | 15 ++++++++++++---
 Meta/import-wpt-test.py | 21 +++++++++++++--------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/Meta/WPT.sh b/Meta/WPT.sh
index dd80467df7c..24457bb9a91 100755
--- a/Meta/WPT.sh
+++ b/Meta/WPT.sh
@@ -659,10 +659,19 @@ if [[ "$CMD" =~ ^(update|clean|run|serve|compare|import|list-tests)$ ]]; then
             serve_wpt
             ;;
         import)
-            if [ "$1" = "--force" ]; then
+            while [[ "$1" =~ ^--(force|wpt-base-url)$ ]]; do
+                if [ "$1" = "--wpt-base-url" ]; then
+                    if [ -z "$2" ]; then
+                        echo "Missing argument for --wpt-base-url"
+                        usage
+                    fi
+                    IMPORT_ARGS+=( "--wpt-base-url=$2" )
+                    shift
+                else
+                    IMPORT_ARGS+=( "$1" )
+                fi
                 shift
-                IMPORT_ARGS+=( "--force" )
-            fi
+            done
             if [ $# -eq 0 ]; then
                 usage
             fi
diff --git a/Meta/import-wpt-test.py b/Meta/import-wpt-test.py
index 350e6e5f580..3ee7be341c5 100755
--- a/Meta/import-wpt-test.py
+++ b/Meta/import-wpt-test.py
@@ -16,7 +16,6 @@ from urllib.parse import urlsplit
 from urllib.parse import urlunsplit
 from urllib.request import urlopen
 
-wpt_base_url = "https://wpt.live/"
 download_exclude_list = {
     # This relies on a dynamic path that cannot be rewitten by the importer.
     "/resources/idlharness.js",
@@ -131,7 +130,9 @@ class TestTypeIdentifier(HTMLParser):
                 self.ref_test_link_found = True
 
 
-def map_to_path(sources: list[ResourceAndType], is_resource=True, resource_path=None) -> list[PathMapping]:
+def map_to_path(
+    sources: list[ResourceAndType], wpt_base_url: str, is_resource=True, resource_path=None
+) -> list[PathMapping]:
     filepaths: list[PathMapping] = []
 
     for source in sources:
@@ -211,11 +212,12 @@ def remove_repeated_url_slashes(url):
     return "/" + "/".join(segment for segment in parsed.path.split("/") if segment)
 
 
-def download_files(filepaths, skip_existing):
+def download_files(filepaths, wpt_base_url, skip_existing):
     downloaded_files = []
 
     for file in filepaths:
         normalized_path = remove_repeated_url_slashes(file.source)
+        print(f"Source {normalized_path}, Destination {file.destination}")
         if normalized_path in visited_paths:
             continue
         if normalized_path in download_exclude_list:
@@ -223,7 +225,7 @@ def download_files(filepaths, skip_existing):
             visited_paths.add(normalized_path)
             continue
 
-        source = urljoin(file.source, "/".join(file.source.split("/")[3:]))
+        source = urljoin(wpt_base_url, normalized_path)
         destination = Path(os.path.normpath(file.destination))
 
         if skip_existing and destination.exists():
@@ -271,11 +273,14 @@ def create_expectation_files(files, skip_existing):
 def main():
     parser = argparse.ArgumentParser(description="Import a WPT test into LibWeb")
     parser.add_argument("--force", action="store_true", help="Force download of files even if they already exist")
+    parser.add_argument("--wpt-base-url", type=urlparse, default="https://wpt.live/")
     parser.add_argument("url", type=str, help="The URL of the WPT test to import")
     args = parser.parse_args()
 
     skip_existing = not args.force
     url_to_import = args.url
+    wpt_base_url = args.wpt_base_url.geturl()
+    print(f"Importing WPT test from: {wpt_base_url}")
     resource_path = "/".join(Path(url_to_import).parts[2::])
 
     with urlopen(url_to_import) as response:
@@ -293,7 +298,7 @@ def main():
     print(f"Identified {url_to_import} as type {test_type}, ref {raw_reference_path}")
 
     main_file = [ResourceAndType(resource_path, ResourceType.INPUT)]
-    main_paths = map_to_path(main_file, False)
+    main_paths = map_to_path(main_file, wpt_base_url, False)
 
     if test_type == TestType.REF and raw_reference_path is None:
         raise RuntimeError("Failed to file reference path in ref test")
@@ -316,7 +321,7 @@ def main():
                 )
             )
 
-    files_to_modify = download_files(main_paths, skip_existing)
+    files_to_modify = download_files(main_paths, wpt_base_url, skip_existing)
     create_expectation_files(main_paths, skip_existing)
 
     input_parser = LinkedResourceFinder()
@@ -333,8 +338,8 @@ def main():
     )
 
     modify_sources(files_to_modify, additional_resources)
-    script_paths = map_to_path(additional_resources, True, resource_path)
-    download_files(script_paths, skip_existing)
+    script_paths = map_to_path(additional_resources, wpt_base_url, True, resource_path)
+    download_files(script_paths, wpt_base_url, skip_existing)
 
 
 if __name__ == "__main__":