mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-09 01:29:17 +00:00
Meta: Normalize relative URLs before fetching them in the WPT importer
Previously, fetching URLs with `..` components would fail.
This commit is contained in:
parent
7724a96efa
commit
7507906c68
Notes:
github-actions[bot]
2025-06-30 23:45:04 +00:00
Author: https://github.com/tcl3
Commit: 7507906c68
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5178
1 changed files with 16 additions and 4 deletions
|
@ -9,8 +9,11 @@ from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from posixpath import normpath
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
from urllib.parse import urlsplit
|
||||||
|
from urllib.parse import urlunsplit
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
|
||||||
wpt_base_url = "https://wpt.live/"
|
wpt_base_url = "https://wpt.live/"
|
||||||
|
@ -196,7 +199,14 @@ def modify_sources(files, resources: list[ResourceAndType]) -> None:
|
||||||
f.write(str(page_source))
|
f.write(str(page_source))
|
||||||
|
|
||||||
|
|
||||||
def normalized_url_path(url):
|
def normalize_url(url):
|
||||||
|
parts = urlsplit(url)
|
||||||
|
normalized_path = normpath(parts.path)
|
||||||
|
normalized_url = urlunsplit((parts.scheme, parts.netloc, normalized_path, parts.query, parts.fragment))
|
||||||
|
return normalized_url
|
||||||
|
|
||||||
|
|
||||||
|
def remove_repeated_url_slashes(url):
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
return "/" + "/".join(segment for segment in parsed.path.split("/") if segment)
|
return "/" + "/".join(segment for segment in parsed.path.split("/") if segment)
|
||||||
|
|
||||||
|
@ -205,7 +215,7 @@ def download_files(filepaths, skip_existing):
|
||||||
downloaded_files = []
|
downloaded_files = []
|
||||||
|
|
||||||
for file in filepaths:
|
for file in filepaths:
|
||||||
normalized_path = normalized_url_path(file.source)
|
normalized_path = remove_repeated_url_slashes(file.source)
|
||||||
if normalized_path in visited_paths:
|
if normalized_path in visited_paths:
|
||||||
continue
|
continue
|
||||||
if normalized_path in download_exclude_list:
|
if normalized_path in download_exclude_list:
|
||||||
|
@ -293,14 +303,16 @@ def main():
|
||||||
reference_path = raw_reference_path
|
reference_path = raw_reference_path
|
||||||
main_paths.append(
|
main_paths.append(
|
||||||
PathMapping(
|
PathMapping(
|
||||||
wpt_base_url + raw_reference_path, Path(test_type.expected_path + raw_reference_path).absolute()
|
normalize_url(wpt_base_url + raw_reference_path),
|
||||||
|
Path(test_type.expected_path + raw_reference_path).absolute(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
reference_path = Path(resource_path).parent.joinpath(raw_reference_path).__str__()
|
reference_path = Path(resource_path).parent.joinpath(raw_reference_path).__str__()
|
||||||
main_paths.append(
|
main_paths.append(
|
||||||
PathMapping(
|
PathMapping(
|
||||||
wpt_base_url + "/" + reference_path, Path(test_type.expected_path + "/" + reference_path).absolute()
|
normalize_url(wpt_base_url + "/" + reference_path),
|
||||||
|
Path(test_type.expected_path + "/" + reference_path).absolute(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue