diff --git a/Tests/LibWeb/Text/expected/URL/invalid-urls.txt b/Tests/LibWeb/Text/expected/URL/invalid-urls.txt new file mode 100644 index 00000000000..7ffbe3d1b7b --- /dev/null +++ b/Tests/LibWeb/Text/expected/URL/invalid-urls.txt @@ -0,0 +1,2 @@ +new URL('file://xn--/p', undefined) +error creating URL: 'TypeError: Invalid URL' diff --git a/Tests/LibWeb/Text/input/URL/invalid-urls.html b/Tests/LibWeb/Text/input/URL/invalid-urls.html new file mode 100644 index 00000000000..b8f73c9fece --- /dev/null +++ b/Tests/LibWeb/Text/input/URL/invalid-urls.html @@ -0,0 +1,21 @@ + + diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp index 36a67998c39..6a9aa801b86 100644 --- a/Userland/Libraries/LibURL/Parser.cpp +++ b/Userland/Libraries/LibURL/Parser.cpp @@ -596,14 +596,26 @@ static ErrorOr domain_to_ascii(StringView domain, bool be_strict) // 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46] // 2. If result is a failure value, domain-to-ASCII validation error, return failure. - // OPTIMIZATION: Fast path for all-ASCII domain strings. - if (all_of(domain, is_ascii)) { + // OPTIMIZATION: If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.) + // does not produce any item that starts with an ASCII case-insensitive match for "xn--", this + // step is equivalent to ASCII lowercasing domain. + if (!be_strict && all_of(domain, is_ascii)) { // 3. If result is the empty string, domain-to-ASCII validation error, return failure. if (domain.is_empty()) return Error::from_string_literal("Empty domain"); - auto lowercase_domain = domain.to_lowercase_string(); - return String::from_utf8_without_validation(lowercase_domain.bytes()); + bool slow_path = false; + for (auto part : domain.split_view('.')) { + if (part.starts_with("xn--"sv, CaseSensitivity::CaseInsensitive)) { + slow_path = true; + break; + } + } + + if (!slow_path) { + auto lowercase_domain = domain.to_lowercase_string(); + return String::from_utf8_without_validation(lowercase_domain.bytes()); + } } Unicode::IDNA::ToAsciiOptions const options {