LibURL: Correctly acquire the registrable domain for a URL

We were using the public suffix of the URL's host as its registrable
domain. But the registrable domain is actually the public suffix plus
one additional label.
This commit is contained in:
Timothy Flynn 2025-03-09 11:11:35 -04:00 committed by Jelle Raaijmakers
commit a34f7a5bd1
Notes: github-actions[bot] 2025-03-11 11:11:58 +00:00
4 changed files with 75 additions and 1 deletions

View file

@ -527,4 +527,28 @@ Optional<String> get_public_suffix([[maybe_unused]] StringView host)
#endif
}
// https://github.com/publicsuffix/list/wiki/Format#algorithm
Optional<String> get_registrable_domain(StringView host)
{
// The registered or registrable domain is the public suffix plus one additional label.
auto public_suffix = get_public_suffix(host);
if (!public_suffix.has_value() || !host.ends_with(*public_suffix))
return {};
if (host == *public_suffix)
return {};
auto subhost = host.substring_view(0, host.length() - public_suffix->bytes_as_string_view().length());
subhost = subhost.trim("."sv, TrimMode::Right);
if (subhost.is_empty())
return {};
size_t start_index = 0;
if (auto index = subhost.find_last('.'); index.has_value())
start_index = *index + 1;
return MUST(String::from_utf8(host.substring_view(start_index)));
}
}