From d6af5bf5eb814fcb30959d2bf9fc666cdba716c7 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Tue, 6 Aug 2024 02:00:52 +1200 Subject: [PATCH] LibURL: Allow inputs containing only whitespace The check for: ``` if (start_index >= end_index) return {}; ``` To prevent an out of bounds when trimming the start and end of the input of whitespace was preventing valid URLs (only having whitespace in the input) from being parsed. Instead, prevent start_index from ever getting above end_index in the first place, and don't treat empty inputs as an error. Fixes one WPT test on: https://wpt.live/url/url-constructor.any.html --- Tests/LibWeb/Text/expected/URL/url.txt | 20 ++++++++++++++++++++ Tests/LibWeb/Text/input/URL/url.html | 1 + Userland/Libraries/LibURL/Parser.cpp | 25 +++++++++---------------- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/Tests/LibWeb/Text/expected/URL/url.txt b/Tests/LibWeb/Text/expected/URL/url.txt index a89772257ef..1bde4457716 100644 --- a/Tests/LibWeb/Text/expected/URL/url.txt +++ b/Tests/LibWeb/Text/expected/URL/url.txt @@ -108,6 +108,16 @@ port => '9000' pathname => '/path' search => '?query' hash => '#frag' +new URL(' \t', 'http://ladybird.org/foo/bar') +protocol => 'http:' +username => '' +password => '' +host => 'ladybird.org' +hostname => 'ladybird.org' +port => '' +pathname => '/foo/bar' +search => '' +hash => '' ========================================= URL.parse('ftp://serenityos.org:21', undefined) protocol => 'ftp:' @@ -219,3 +229,13 @@ port => '9000' pathname => '/path' search => '?query' hash => '#frag' +URL.parse(' \t', 'http://ladybird.org/foo/bar') +protocol => 'http:' +username => '' +password => '' +host => 'ladybird.org' +hostname => 'ladybird.org' +port => '' +pathname => '/foo/bar' +search => '' +hash => '' diff --git a/Tests/LibWeb/Text/input/URL/url.html b/Tests/LibWeb/Text/input/URL/url.html index c7135150a07..644590bbb2a 100644 --- a/Tests/LibWeb/Text/input/URL/url.html +++ b/Tests/LibWeb/Text/input/URL/url.html @@ -32,6 +32,7 @@ { input: 'file://a%C2%ADb/p' }, { input: 'http://user%20name:pa%40ss%3Aword@www.ladybird.org' }, { input: 'h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg' }, + { input: ' \t', base: 'http://ladybird.org/foo/bar' }, ]; for (url of urls) { diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp index c636b880987..ad593ce5069 100644 --- a/Userland/Libraries/LibURL/Parser.cpp +++ b/Userland/Libraries/LibURL/Parser.cpp @@ -808,29 +808,22 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 2. If input contains any leading or trailing C0 control or space, invalid-URL-unit validation error. // 3. Remove any leading and trailing C0 control or space from input. bool has_validation_error = false; - for (size_t i = 0; i < raw_input.length(); ++i) { - u8 ch = raw_input[i]; - if (is_ascii_c0_control_or_space(ch)) { - ++start_index; - has_validation_error = true; - } else { + + for (; start_index < raw_input.length(); ++start_index) { + if (!is_ascii_c0_control_or_space(raw_input[start_index])) break; - } + has_validation_error = true; } - for (ssize_t i = raw_input.length() - 1; i >= 0; --i) { - u8 ch = raw_input[i]; - if (is_ascii_c0_control_or_space(ch)) { - --end_index; - has_validation_error = true; - } else { + + for (; end_index > start_index; --end_index) { + if (!is_ascii_c0_control_or_space(raw_input[end_index - 1])) break; - } + has_validation_error = true; } + if (has_validation_error) report_validation_error(); } - if (start_index >= end_index) - return {}; ByteString processed_input = raw_input.substring_view(start_index, end_index - start_index);