LibURL: Correctly acquire the registrable domain for a URL

We were using the public suffix of the URL's host as its registrable
domain. But the registrable domain is actually the public suffix plus
one additional label.
This commit is contained in:
Timothy Flynn 2025-03-09 11:11:35 -04:00 committed by Jelle Raaijmakers
parent d0f80e1f05
commit a34f7a5bd1
Notes: github-actions[bot] 2025-03-11 11:11:58 +00:00
4 changed files with 75 additions and 1 deletions

View file

@ -220,7 +220,7 @@ Optional<String> Host::registrable_domain() const
// 3. Let registrableDomain be the registrable domain determined by running the Public Suffix List algorithm with host as domain. [PSL]
// NOTE: The spec algorithm for the public suffix returns "*" by default, but get_public_suffix() returns an empty Optional.
// Remove the `value_or()` if and when we update it.
auto registrable_domain = get_public_suffix(host_string).value_or("*"_string);
auto registrable_domain = get_registrable_domain(host_string).value_or("*"_string);
// 4. Assert: registrableDomain is an ASCII string that does not end with ".".
VERIFY(all_of(registrable_domain.code_points(), is_ascii));

View file

@ -527,4 +527,28 @@ Optional<String> get_public_suffix([[maybe_unused]] StringView host)
#endif
}
// https://github.com/publicsuffix/list/wiki/Format#algorithm
Optional<String> get_registrable_domain(StringView host)
{
// The registered or registrable domain is the public suffix plus one additional label.
auto public_suffix = get_public_suffix(host);
if (!public_suffix.has_value() || !host.ends_with(*public_suffix))
return {};
if (host == *public_suffix)
return {};
auto subhost = host.substring_view(0, host.length() - public_suffix->bytes_as_string_view().length());
subhost = subhost.trim("."sv, TrimMode::Right);
if (subhost.is_empty())
return {};
size_t start_index = 0;
if (auto index = subhost.find_last('.'); index.has_value())
start_index = *index + 1;
return MUST(String::from_utf8(host.substring_view(start_index)));
}
}

View file

@ -203,6 +203,7 @@ URL create_with_data(StringView mime_type, StringView payload, bool is_base64 =
bool is_public_suffix(StringView host);
Optional<String> get_public_suffix(StringView host);
Optional<String> get_registrable_domain(StringView host);
inline URL about_blank() { return URL::about("blank"_string); }
inline URL about_srcdoc() { return URL::about("srcdoc"_string); }

View file

@ -558,3 +558,52 @@ TEST_CASE(invalid_domain_code_points)
EXPECT(!url.has_value());
}
}
TEST_CASE(get_registrable_domain)
{
{
auto domain = URL::get_registrable_domain({});
EXPECT(!domain.has_value());
}
{
auto domain = URL::get_registrable_domain("foobar"sv);
EXPECT(!domain.has_value());
}
{
auto domain = URL::get_registrable_domain("com"sv);
EXPECT(!domain.has_value());
}
{
auto domain = URL::get_registrable_domain(".com"sv);
EXPECT(!domain.has_value());
}
{
auto domain = URL::get_registrable_domain("example.com"sv);
VERIFY(domain.has_value());
EXPECT_EQ(*domain, "example.com"sv);
}
{
auto domain = URL::get_registrable_domain(".example.com"sv);
VERIFY(domain.has_value());
EXPECT_EQ(*domain, "example.com"sv);
}
{
auto domain = URL::get_registrable_domain("www.example.com"sv);
VERIFY(domain.has_value());
EXPECT_EQ(*domain, "example.com"sv);
}
{
auto domain = URL::get_registrable_domain("sub.www.example.com"sv);
VERIFY(domain.has_value());
EXPECT_EQ(*domain, "example.com"sv);
}
{
auto domain = URL::get_registrable_domain("github.io"sv);
EXPECT(!domain.has_value());
}
{
auto domain = URL::get_registrable_domain("ladybird.github.io"sv);
VERIFY(domain.has_value());
EXPECT_EQ(*domain, "ladybird.github.io"sv);
}
}