diff --git a/Libraries/LibURL/Host.cpp b/Libraries/LibURL/Host.cpp index 4f342ce1509..e86446e2f27 100644 --- a/Libraries/LibURL/Host.cpp +++ b/Libraries/LibURL/Host.cpp @@ -220,7 +220,7 @@ Optional Host::registrable_domain() const // 3. Let registrableDomain be the registrable domain determined by running the Public Suffix List algorithm with host as domain. [PSL] // NOTE: The spec algorithm for the public suffix returns "*" by default, but get_public_suffix() returns an empty Optional. // Remove the `value_or()` if and when we update it. - auto registrable_domain = get_public_suffix(host_string).value_or("*"_string); + auto registrable_domain = get_registrable_domain(host_string).value_or("*"_string); // 4. Assert: registrableDomain is an ASCII string that does not end with ".". VERIFY(all_of(registrable_domain.code_points(), is_ascii)); diff --git a/Libraries/LibURL/URL.cpp b/Libraries/LibURL/URL.cpp index e0b879b1107..c57ae3e6b14 100644 --- a/Libraries/LibURL/URL.cpp +++ b/Libraries/LibURL/URL.cpp @@ -527,4 +527,28 @@ Optional get_public_suffix([[maybe_unused]] StringView host) #endif } +// https://github.com/publicsuffix/list/wiki/Format#algorithm +Optional get_registrable_domain(StringView host) +{ + // The registered or registrable domain is the public suffix plus one additional label. + auto public_suffix = get_public_suffix(host); + if (!public_suffix.has_value() || !host.ends_with(*public_suffix)) + return {}; + + if (host == *public_suffix) + return {}; + + auto subhost = host.substring_view(0, host.length() - public_suffix->bytes_as_string_view().length()); + subhost = subhost.trim("."sv, TrimMode::Right); + + if (subhost.is_empty()) + return {}; + + size_t start_index = 0; + if (auto index = subhost.find_last('.'); index.has_value()) + start_index = *index + 1; + + return MUST(String::from_utf8(host.substring_view(start_index))); +} + } diff --git a/Libraries/LibURL/URL.h b/Libraries/LibURL/URL.h index 4df425b4983..ffda3acccc2 100644 --- a/Libraries/LibURL/URL.h +++ b/Libraries/LibURL/URL.h @@ -203,6 +203,7 @@ URL create_with_data(StringView mime_type, StringView payload, bool is_base64 = bool is_public_suffix(StringView host); Optional get_public_suffix(StringView host); +Optional get_registrable_domain(StringView host); inline URL about_blank() { return URL::about("blank"_string); } inline URL about_srcdoc() { return URL::about("srcdoc"_string); } diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index 5aee815492f..815fb07952a 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -558,3 +558,52 @@ TEST_CASE(invalid_domain_code_points) EXPECT(!url.has_value()); } } + +TEST_CASE(get_registrable_domain) +{ + { + auto domain = URL::get_registrable_domain({}); + EXPECT(!domain.has_value()); + } + { + auto domain = URL::get_registrable_domain("foobar"sv); + EXPECT(!domain.has_value()); + } + { + auto domain = URL::get_registrable_domain("com"sv); + EXPECT(!domain.has_value()); + } + { + auto domain = URL::get_registrable_domain(".com"sv); + EXPECT(!domain.has_value()); + } + { + auto domain = URL::get_registrable_domain("example.com"sv); + VERIFY(domain.has_value()); + EXPECT_EQ(*domain, "example.com"sv); + } + { + auto domain = URL::get_registrable_domain(".example.com"sv); + VERIFY(domain.has_value()); + EXPECT_EQ(*domain, "example.com"sv); + } + { + auto domain = URL::get_registrable_domain("www.example.com"sv); + VERIFY(domain.has_value()); + EXPECT_EQ(*domain, "example.com"sv); + } + { + auto domain = URL::get_registrable_domain("sub.www.example.com"sv); + VERIFY(domain.has_value()); + EXPECT_EQ(*domain, "example.com"sv); + } + { + auto domain = URL::get_registrable_domain("github.io"sv); + EXPECT(!domain.has_value()); + } + { + auto domain = URL::get_registrable_domain("ladybird.github.io"sv); + VERIFY(domain.has_value()); + EXPECT_EQ(*domain, "ladybird.github.io"sv); + } +}