From b49b1b35e4d19c4171004b7eddb6eeac91f00a97 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sat, 28 Jun 2025 22:19:33 +1200 Subject: [PATCH] LibURL: Correct logic for domains not matched by PSL in public_suffix For the AO defined in the URL specification, in the case the domain does not match against the PSL, we should be returning the TLD. This fixes a crash for a bunch of WPT tests using the Document.domain setter when the test is being served by WPT locally. We should be doing similar logic in registrable_domain, but that unfortunately runs into some other issues, so just leave a FIXME for now. --- Libraries/LibURL/Host.cpp | 19 ++++++++++------ Tests/LibURL/TestURL.cpp | 48 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/Libraries/LibURL/Host.cpp b/Libraries/LibURL/Host.cpp index d0b8954b8d1..2f6c1b2d833 100644 --- a/Libraries/LibURL/Host.cpp +++ b/Libraries/LibURL/Host.cpp @@ -192,13 +192,19 @@ Optional Host::public_suffix() const auto trailing_dot = host_string.ends_with('.') ? "."sv : ""sv; // 3. Let publicSuffix be the public suffix determined by running the Public Suffix List algorithm with host as domain. [PSL] - // NOTE: The spec algorithm for the public suffix returns "*" by default, but get_public_suffix() returns an empty Optional. - // Remove the `value_or()` if and when we update it. - auto public_suffix = PublicSuffixData::the()->get_public_suffix(host_string).value_or("*"_string); + // FIXME: Unify this logic with registrable domain. + auto public_suffix = PublicSuffixData::the()->get_public_suffix(host_string); + if (!public_suffix.has_value()) { + auto last_dot = host_string.bytes_as_string_view().find_last('.'); + if (last_dot.has_value()) + public_suffix = MUST(host_string.substring_from_byte_offset(last_dot.value() + 1)); + else + public_suffix = host_string; + } // 4. Assert: publicSuffix is an ASCII string that does not end with ".". - VERIFY(public_suffix.is_ascii()); - VERIFY(!public_suffix.ends_with('.')); + VERIFY(public_suffix->is_ascii()); + VERIFY(!public_suffix->ends_with('.')); // 5. Return publicSuffix and trailingDot concatenated. return MUST(String::formatted("{}{}", public_suffix, trailing_dot)); @@ -219,8 +225,7 @@ Optional Host::registrable_domain() const auto trailing_dot = host_string.ends_with('.') ? "."sv : ""sv; // 3. Let registrableDomain be the registrable domain determined by running the Public Suffix List algorithm with host as domain. [PSL] - // NOTE: The spec algorithm for the public suffix returns "*" by default, but get_public_suffix() returns an empty Optional. - // Remove the `value_or()` if and when we update it. + // FIXME: This is not correct, we should be doing the same as public_suffix() above. auto registrable_domain = get_registrable_domain(host_string).value_or("*"_string); // 4. Assert: registrableDomain is an ASCII string that does not end with ".". diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index 96583f13afa..7cf1982468f 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -641,3 +641,51 @@ TEST_CASE(get_registrable_domain) EXPECT_EQ(*domain, "ladybird.github.io"sv); } } + +TEST_CASE(public_suffix) +{ + { + auto domain = URL::Parser::parse_host("com"sv); + EXPECT_EQ(domain->public_suffix(), "com"sv); + } + { + auto domain = URL::Parser::parse_host("example.com"sv); + EXPECT_EQ(domain->public_suffix(), "com"sv); + } + { + auto domain = URL::Parser::parse_host("www.example.com"sv); + EXPECT_EQ(domain->public_suffix(), "com"sv); + } + { + auto domain = URL::Parser::parse_host("EXAMPLE.COM"sv); + EXPECT_EQ(domain->public_suffix(), "com"sv); + } + { + auto domain = URL::Parser::parse_host("www.example.com."sv); + EXPECT_EQ(domain->public_suffix(), "com."sv); + } + { + auto domain = URL::Parser::parse_host("github.io"sv); + EXPECT_EQ(domain->public_suffix(), "github.io"sv); + } + { + auto domain = URL::Parser::parse_host("whatwg.github.io"sv); + EXPECT_EQ(domain->public_suffix(), "github.io"sv); + } + { + auto domain = URL::Parser::parse_host("إختبار"sv); + EXPECT_EQ(domain->public_suffix(), "xn--kgbechtv"sv); + } + { + auto domain = URL::Parser::parse_host("example.إختبار"sv); + EXPECT_EQ(domain->public_suffix(), "xn--kgbechtv"sv); + } + { + auto domain = URL::Parser::parse_host("sub.example.إختبار"sv); + EXPECT_EQ(domain->public_suffix(), "xn--kgbechtv"sv); + } + { + auto domain = URL::Parser::parse_host("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"sv); + EXPECT_EQ(domain->public_suffix(), OptionalNone {}); + } +}