LibURL: Also remove carriage returns from URL input

The definition of an "ASCII tab or newline" also includes U+000D CR.

This fixes 3 subtests in:

https://wpt.live/url/url-constructor.any.html
This commit is contained in:
Shannon Booth 2024-08-05 19:44:12 +12:00 committed by Andreas Kling
commit 41cf9f6fe3
Notes: github-actions[bot] 2024-08-05 12:56:58 +00:00
3 changed files with 34 additions and 6 deletions

View file

@ -98,6 +98,16 @@ port => ''
pathname => '/' pathname => '/'
search => '' search => ''
hash => '' hash => ''
new URL('h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg', undefined)
protocol => 'http:'
username => ''
password => ''
host => 'host:9000'
hostname => 'host'
port => '9000'
pathname => '/path'
search => '?query'
hash => '#frag'
========================================= =========================================
URL.parse('ftp://serenityos.org:21', undefined) URL.parse('ftp://serenityos.org:21', undefined)
protocol => 'ftp:' protocol => 'ftp:'
@ -199,3 +209,13 @@ port => ''
pathname => '/' pathname => '/'
search => '' search => ''
hash => '' hash => ''
URL.parse('h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg', undefined)
protocol => 'http:'
username => ''
password => ''
host => 'host:9000'
hostname => 'host'
port => '9000'
pathname => '/path'
search => '?query'
hash => '#frag'

View file

@ -1,6 +1,13 @@
<script src="../include.js"></script> <script src="../include.js"></script>
<script> <script>
test(() => { test(() => {
function escapeWhitespace(str) {
return str
.replace(/\t/g, '\\t')
.replace(/\n/g, '\\n')
.replace(/\r/g, '\\r');
}
function printURL(url) { function printURL(url) {
println(`protocol => '${url.protocol}'`); println(`protocol => '${url.protocol}'`);
println(`username => '${url.username}'`); println(`username => '${url.username}'`);
@ -24,13 +31,14 @@
{ input: '//d:/..', base: 'file:///C:/a/b' }, { input: '//d:/..', base: 'file:///C:/a/b' },
{ input: 'file://a%C2%ADb/p' }, { input: 'file://a%C2%ADb/p' },
{ input: 'http://user%20name:pa%40ss%3Aword@www.ladybird.org' }, { input: 'http://user%20name:pa%40ss%3Aword@www.ladybird.org' },
{ input: 'h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg' },
]; ];
for (url of urls) { for (url of urls) {
if (url.base === undefined) if (url.base === undefined)
println(`new URL('${url.input}', ${url.base})`); println(`new URL('${escapeWhitespace(url.input)}', ${url.base})`);
else else
println(`new URL('${url.input}', '${url.base}')`); println(`new URL('${escapeWhitespace(url.input)}', '${escapeWhitespace(url.base)}')`);
printURL(new URL(url.input, url.base)); printURL(new URL(url.input, url.base));
} }
@ -39,9 +47,9 @@
for (url of urls) { for (url of urls) {
if (url.base === undefined) if (url.base === undefined)
println(`URL.parse('${url.input}', ${url.base})`); println(`URL.parse('${escapeWhitespace(url.input)}', ${url.base})`);
else else
println(`URL.parse('${url.input}', '${url.base}')`); println(`URL.parse('${escapeWhitespace(url.input)}', '${escapeWhitespace(url.base)}')`);
printURL(URL.parse(url.input, url.base)); printURL(URL.parse(url.input, url.base));
} }

View file

@ -845,9 +845,9 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 2. If input contains any ASCII tab or newline, invalid-URL-unit validation error. // 2. If input contains any ASCII tab or newline, invalid-URL-unit validation error.
// 3. Remove all ASCII tab or newline from input. // 3. Remove all ASCII tab or newline from input.
for (auto const ch : processed_input) { for (auto const ch : processed_input) {
if (ch == '\t' || ch == '\n') { if (ch == '\t' || ch == '\n' || ch == '\r') {
report_validation_error(); report_validation_error();
processed_input = processed_input.replace("\t"sv, ""sv, ReplaceMode::All).replace("\n"sv, ""sv, ReplaceMode::All); processed_input = processed_input.replace("\t"sv, ""sv, ReplaceMode::All).replace("\n"sv, ""sv, ReplaceMode::All).replace("\r"sv, ""sv, ReplaceMode::All);
break; break;
} }
} }