ladybird/Userland/Libraries/LibWebView/URL.cpp
Tim Ledbetter e9f34c7bd1 LibWebView: Don't query public suffix list when sanitizing URLs
Previously, part of the procedure we used to sanitize URLs entered via
the command line would check the host against the public suffix
database. This led to some valid, but not publicly accessible URLs
being treated as invalid.
2024-06-10 20:34:57 -04:00

160 lines
4.7 KiB
C++

/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/String.h>
#include <LibCore/System.h>
#include <LibFileSystem/FileSystem.h>
#include <LibWebView/URL.h>
#if defined(ENABLE_PUBLIC_SUFFIX)
# include <LibWebView/PublicSuffixData.h>
#endif
namespace WebView {
bool is_public_suffix([[maybe_unused]] StringView host)
{
#if defined(ENABLE_PUBLIC_SUFFIX)
return PublicSuffixData::the()->is_public_suffix(host);
#else
return false;
#endif
}
Optional<String> get_public_suffix([[maybe_unused]] StringView host)
{
#if defined(ENABLE_PUBLIC_SUFFIX)
return MUST(PublicSuffixData::the()->get_public_suffix(host));
#else
return {};
#endif
}
Optional<URL::URL> sanitize_url(StringView url, Optional<StringView> search_engine, AppendTLD append_tld)
{
if (FileSystem::exists(url)) {
auto path = FileSystem::real_path(url);
if (path.is_error())
return {};
return URL::create_with_file_scheme(path.value());
}
auto format_search_engine = [&]() -> Optional<URL::URL> {
if (!search_engine.has_value())
return {};
return MUST(String::formatted(*search_engine, URL::percent_decode(url)));
};
String url_buffer;
if (append_tld == AppendTLD::Yes) {
// FIXME: Expand the list of top level domains.
if (!url.ends_with(".com"sv) && !url.ends_with(".net"sv) && !url.ends_with(".org"sv)) {
url_buffer = MUST(String::formatted("{}.com", url));
url = url_buffer;
}
}
ByteString url_with_scheme = url;
if (!(url_with_scheme.starts_with("about:"sv) || url_with_scheme.contains("://"sv)))
url_with_scheme = ByteString::formatted("https://{}"sv, url_with_scheme);
auto result = URL::create_with_url_or_path(url_with_scheme);
if (!result.is_valid())
return format_search_engine();
return result;
}
static URLParts break_file_url_into_parts(URL::URL const& url, StringView url_string)
{
auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
auto path = url_string.substring_view(scheme.length());
return URLParts { scheme, path, {} };
}
static URLParts break_web_url_into_parts(URL::URL const& url, StringView url_string)
{
auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length());
auto url_without_scheme = url_string.substring_view(scheme.length());
StringView domain;
StringView remainder;
if (auto index = url_without_scheme.find_any_of("/?#"sv); index.has_value()) {
domain = url_without_scheme.substring_view(0, *index);
remainder = url_without_scheme.substring_view(*index);
} else {
domain = url_without_scheme;
}
auto public_suffix = get_public_suffix(domain);
if (!public_suffix.has_value() || !domain.ends_with(*public_suffix))
return { scheme, domain, remainder };
auto subdomain = domain.substring_view(0, domain.length() - public_suffix->bytes_as_string_view().length());
subdomain = subdomain.trim("."sv, TrimMode::Right);
if (auto index = subdomain.find_last('.'); index.has_value()) {
subdomain = subdomain.substring_view(0, *index + 1);
domain = domain.substring_view(subdomain.length());
} else {
subdomain = {};
}
auto scheme_and_subdomain = url_string.substring_view(0, scheme.length() + subdomain.length());
return { scheme_and_subdomain, domain, remainder };
}
Optional<URLParts> break_url_into_parts(StringView url_string)
{
auto url = URL::create_with_url_or_path(url_string);
if (!url.is_valid())
return {};
auto const& scheme = url.scheme();
auto scheme_length = scheme.bytes_as_string_view().length();
if (!url_string.starts_with(scheme))
return {};
if (!url_string.substring_view(scheme_length).starts_with("://"sv))
return {};
if (url.scheme() == "file"sv)
return break_file_url_into_parts(url, url_string);
if (url.scheme().is_one_of("http"sv, "https"sv))
return break_web_url_into_parts(url, url_string);
return {};
}
URLType url_type(URL::URL const& url)
{
if (url.scheme() == "mailto"sv)
return URLType::Email;
if (url.scheme() == "tel"sv)
return URLType::Telephone;
return URLType::Other;
}
String url_text_to_copy(URL::URL const& url)
{
auto url_text = MUST(url.to_string());
if (url.scheme() == "mailto"sv)
return MUST(url_text.substring_from_byte_offset("mailto:"sv.length()));
if (url.scheme() == "tel"sv)
return MUST(url_text.substring_from_byte_offset("tel:"sv.length()));
return url_text;
}
}