LibWebView: Do not use AK::format to format search engine URLs

This is to prepare for custom search engines. If we use AK::format, it
would be trivial for a user (or bad actor) to come up with a template
search engine URL that ultimately crashes the browser due to internal
assertions in AK::format. For example:

    https://example.com/crash={1}

Rather than coming up with a complicated pre-format validator, let's
just not use AK::format. Custom URLs will signify their template query
parameters with "%s". So we can do the same with our built-in engines.
When it comes time to format the URL, we will do a simple string
replacement.
This commit is contained in:
Timothy Flynn 2025-04-04 17:32:04 -04:00 committed by Andreas Kling
commit dbf4b189a4
Notes: github-actions[bot] 2025-04-06 11:46:09 +00:00
9 changed files with 44 additions and 56 deletions

View file

@ -5,21 +5,22 @@
*/
#include <AK/Find.h>
#include <LibURL/URL.h>
#include <LibWebView/SearchEngine.h>
namespace WebView {
static auto builtin_search_engines = to_array<SearchEngine>({
{ "Bing"_string, "https://www.bing.com/search?q={}"_string },
{ "Brave"_string, "https://search.brave.com/search?q={}"_string },
{ "DuckDuckGo"_string, "https://duckduckgo.com/?q={}"_string },
{ "Ecosia"_string, "https://ecosia.org/search?q={}"_string },
{ "Google"_string, "https://www.google.com/search?q={}"_string },
{ "Kagi"_string, "https://kagi.com/search?q={}"_string },
{ "Mojeek"_string, "https://www.mojeek.com/search?q={}"_string },
{ "Startpage"_string, "https://startpage.com/search?q={}"_string },
{ "Yahoo"_string, "https://search.yahoo.com/search?p={}"_string },
{ "Yandex"_string, "https://yandex.com/search/?text={}"_string },
{ "Bing"_string, "https://www.bing.com/search?q=%s"_string },
{ "Brave"_string, "https://search.brave.com/search?q=%s"_string },
{ "DuckDuckGo"_string, "https://duckduckgo.com/?q=%s"_string },
{ "Ecosia"_string, "https://ecosia.org/search?q=%s"_string },
{ "Google"_string, "https://www.google.com/search?q=%s"_string },
{ "Kagi"_string, "https://kagi.com/search?q=%s"_string },
{ "Mojeek"_string, "https://www.mojeek.com/search?q=%s"_string },
{ "Startpage"_string, "https://startpage.com/search?q=%s"_string },
{ "Yahoo"_string, "https://search.yahoo.com/search?p=%s"_string },
{ "Yandex"_string, "https://yandex.com/search/?text=%s"_string },
});
ReadonlySpan<SearchEngine> search_engines()
@ -34,29 +35,20 @@ Optional<SearchEngine> find_search_engine_by_name(StringView name)
});
}
Optional<SearchEngine const&> find_search_engine_by_query_url(StringView query_url)
{
return find_value(builtin_search_engines, [&](auto const& engine) {
return engine.query_url == query_url;
});
}
String format_search_query_for_display(StringView query_url, StringView query)
String SearchEngine::format_search_query_for_display(StringView query) const
{
static constexpr auto MAX_SEARCH_STRING_LENGTH = 32;
if (auto search_engine = find_search_engine_by_query_url(query_url); search_engine.has_value()) {
return MUST(String::formatted("Search {} for \"{:.{}}{}\"",
search_engine->name,
query,
MAX_SEARCH_STRING_LENGTH,
query.length() > MAX_SEARCH_STRING_LENGTH ? "..."sv : ""sv));
}
return MUST(String::formatted("Search for \"{:.{}}{}\"",
return MUST(String::formatted("Search {} for \"{:.{}}{}\"",
name,
query,
MAX_SEARCH_STRING_LENGTH,
query.length() > MAX_SEARCH_STRING_LENGTH ? "..."sv : ""sv));
}
String SearchEngine::format_search_query_for_navigation(StringView query) const
{
return MUST(query_url.replace("%s"sv, URL::percent_encode(query), ReplaceMode::All));
}
}

View file

@ -13,13 +13,14 @@
namespace WebView {
struct SearchEngine {
String format_search_query_for_display(StringView query) const;
String format_search_query_for_navigation(StringView query) const;
String name;
String query_url;
};
ReadonlySpan<SearchEngine> search_engines();
Optional<SearchEngine> find_search_engine_by_name(StringView name);
Optional<SearchEngine const&> find_search_engine_by_query_url(StringView query_url);
String format_search_query_for_display(StringView query_url, StringView query);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2025, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2023-2025, Tim Flynn <trflynn89@ladybird.org>
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
* Copyright (c) 2025, Manuel Zahariev <manuel@duck.com>
*
@ -13,13 +13,13 @@
namespace WebView {
Optional<URL::URL> sanitize_url(StringView location, Optional<StringView> search_engine, AppendTLD append_tld)
Optional<URL::URL> sanitize_url(StringView location, Optional<SearchEngine> const& search_engine, AppendTLD append_tld)
{
auto search_url_or_error = [&]() -> Optional<URL::URL> {
if (!search_engine.has_value())
return {};
return URL::Parser::basic_parse(MUST(String::formatted(*search_engine, URL::percent_encode(location))));
return URL::Parser::basic_parse(search_engine->format_search_query_for_navigation(location));
};
location = location.trim_whitespace();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2023-2025, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -9,6 +9,7 @@
#include <AK/Optional.h>
#include <AK/StringView.h>
#include <LibURL/URL.h>
#include <LibWebView/SearchEngine.h>
namespace WebView {
@ -16,7 +17,7 @@ enum class AppendTLD {
No,
Yes,
};
Optional<URL::URL> sanitize_url(StringView, Optional<StringView> search_engine = {}, AppendTLD = AppendTLD::No);
Optional<URL::URL> sanitize_url(StringView, Optional<SearchEngine> const& search_engine = {}, AppendTLD = AppendTLD::No);
Vector<URL::URL> sanitize_urls(ReadonlySpan<ByteString> raw_urls, URL::URL const& new_tab_page_url);
struct URLParts {