mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-24 18:02:20 +00:00
LibURL: Convert to scalar string before URL parsing
URL parsing is expected to take place on well formed unicode strings.
This commit is contained in:
parent
ad1938086d
commit
e6235210ff
Notes:
github-actions[bot]
2025-07-07 10:51:54 +00:00
Author: https://github.com/shannonbooth
Commit: e6235210ff
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5318
1 changed files with 26 additions and 7 deletions
|
@ -699,6 +699,25 @@ String Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, String
|
|||
return MUST(output.to_string());
|
||||
}
|
||||
|
||||
static bool contains_ascii_tab_or_newline(StringView input)
|
||||
{
|
||||
return any_of(input, [](char character) {
|
||||
return character == '\t' || character == '\n' || character == '\r';
|
||||
});
|
||||
}
|
||||
|
||||
static String remove_ascii_tab_or_newline(StringView input)
|
||||
{
|
||||
StringBuilder output;
|
||||
|
||||
for (auto c : input) {
|
||||
if (c != '\t' && c != '\n' && c != '\r')
|
||||
output.append(c);
|
||||
}
|
||||
|
||||
return MUST(output.to_string());
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-basic-url-parser
|
||||
Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> base_url, URL* url, Optional<State> state_override, Optional<StringView> encoding)
|
||||
{
|
||||
|
@ -733,16 +752,16 @@ Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> bas
|
|||
report_validation_error();
|
||||
}
|
||||
|
||||
ByteString processed_input = raw_input.substring_view(start_index, end_index - start_index);
|
||||
raw_input = raw_input.substring_view(start_index, end_index - start_index);
|
||||
|
||||
// NOTE: URL Parsing expects a well formed UTF-8 'scalar' string but we may be passed through a String with lone surrogates.
|
||||
auto processed_input = String::from_utf8_with_replacement_character(raw_input, String::WithBOMHandling::No);
|
||||
|
||||
// 2. If input contains any ASCII tab or newline, invalid-URL-unit validation error.
|
||||
// 3. Remove all ASCII tab or newline from input.
|
||||
for (auto const ch : processed_input) {
|
||||
if (ch == '\t' || ch == '\n' || ch == '\r') {
|
||||
report_validation_error();
|
||||
processed_input = processed_input.replace("\t"sv, ""sv, ReplaceMode::All).replace("\n"sv, ""sv, ReplaceMode::All).replace("\r"sv, ""sv, ReplaceMode::All);
|
||||
break;
|
||||
}
|
||||
if (contains_ascii_tab_or_newline(processed_input)) {
|
||||
report_validation_error();
|
||||
processed_input = remove_ascii_tab_or_newline(processed_input);
|
||||
}
|
||||
|
||||
// 4. Let state be state override if given, or scheme start state otherwise.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue