From 1edf7a8aa2a9a78bab95711573df05d9cd7b8aab Mon Sep 17 00:00:00 2001 From: Luke Wilde Date: Tue, 26 Nov 2024 11:24:11 +0000 Subject: [PATCH] LibWeb/CSP: Implement URL matching algorithms These are used by all the *-src attributes, to check if a given URL, origin and redirect count matches a source list entry specified in the *-src attribute's values, if it's allowed to. --- .../Directives/DirectiveOperations.cpp | 355 ++++++++++++++++++ .../Directives/DirectiveOperations.h | 9 + 2 files changed, 364 insertions(+) diff --git a/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.cpp b/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.cpp index fe018311ce1..2632a5188aa 100644 --- a/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.cpp +++ b/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.cpp @@ -8,8 +8,13 @@ #include #include #include +#include #include +#include +#include #include +#include +#include namespace Web::ContentSecurityPolicy::Directives { @@ -224,4 +229,354 @@ FlyString get_the_effective_directive_for_inline_checks(Directive::InlineType ty VERIFY_NOT_REACHED(); } +// https://w3c.github.io/webappsec-csp/#scheme-part-match +// An ASCII string scheme-part matches another ASCII string if a CSP source expression that contained the first as a +// scheme-part could potentially match a URL containing the latter as a scheme. For example, we say that "http" +// scheme-part matches "https". +// More formally, two ASCII strings A and B are said to scheme-part match if the following algorithm returns "Matches": +// Spec Note: The matching relation is asymmetric. For example, the source expressions https: and https://example.com/ +// do not match the URL http://example.com/. We always allow a secure upgrade from an explicitly insecure +// expression. script-src http: is treated as equivalent to script-src http: https:, +// script-src http://example.com to script-src http://example.com https://example.com, +// and connect-src ws: to connect-src ws: wss:. +[[nodiscard]] static MatchResult scheme_part_matches(StringView a, StringView b) +{ + // 1. If one of the following is true, return "Matches": + // 1. A is an ASCII case-insensitive match for B. + if (a.equals_ignoring_ascii_case(b)) + return MatchResult::Matches; + + // 2. A is an ASCII case-insensitive match for "http", and B is an ASCII case-insensitive match for "https". + if (a.equals_ignoring_ascii_case("http"sv) && b.equals_ignoring_ascii_case("https"sv)) + return MatchResult::Matches; + + // 3. A is an ASCII case-insensitive match for "ws", and B is an ASCII case-insensitive match for "wss", "http", or "https". + if (a.equals_ignoring_ascii_case("ws"sv) + && (b.equals_ignoring_ascii_case("wss"sv) + || b.equals_ignoring_ascii_case("http"sv) + || b.equals_ignoring_ascii_case("https"sv))) { + return MatchResult::Matches; + } + + // 4. A is an ASCII case-insensitive match for "wss", and B is an ASCII case-insensitive match for "https". + if (a.equals_ignoring_ascii_case("wss"sv) && b.equals_ignoring_ascii_case("https"sv)) + return MatchResult::Matches; + + // 2. Return "Does Not Match". + return MatchResult::DoesNotMatch; +} + +// https://w3c.github.io/webappsec-csp/#host-part-match +// An ASCII string host-part matches a host if a CSP source expression that contained the first as a host-part could +// potentially match the latter. For example, we say that "www.example.com" host-part matches "www.example.com". +// More formally, ASCII string pattern and host host are said to host-part match if the following algorithm returns "Matches": +// Spec Note: The matching relation is asymmetric. That is, pattern matching host does not mean that host will match pattern. +// For example, *.example.com host-part matches www.example.com, but www.example.com does not host-part match *.example.com. +[[nodiscard]] static MatchResult host_part_matches(StringView pattern, Optional const& maybe_host) +{ + // 1. If host is not a domain, return "Does Not Match". + // Spec Note: A future version of this specification may allow literal IPv6 and IPv4 addresses, depending on usage and demand. + // Given the weak security properties of IP addresses in relation to named hosts, however, authors are encouraged + // to prefer the latter whenever possible. + if (!maybe_host.has_value()) + return MatchResult::DoesNotMatch; + + auto const& host = maybe_host.value(); + + if (!host.is_domain()) + return MatchResult::DoesNotMatch; + + // 2. If pattern is "*", return "Matches". + if (pattern == "*"sv) + return MatchResult::Matches; + + VERIFY(host.has()); + auto host_string = host.get(); + + // 3. If pattern starts with "*.": + if (pattern.starts_with("*."sv)) { + // 1. Let remaining be pattern with the leading U+002A (*) removed and ASCII lowercased. + auto remaining_without_asterisk = pattern.substring_view(1); + auto remaining = remaining_without_asterisk.to_ascii_lowercase_string(); + + // 2. If host to ASCII lowercase ends with remaining, then return "Matches". + auto lowercase_host = host_string.to_ascii_lowercase(); + if (lowercase_host.ends_with_bytes(remaining)) + return MatchResult::Matches; + + // 3. Return "Does Not Match". + return MatchResult::DoesNotMatch; + } + + // 4. If pattern is not an ASCII case-insensitive match for host, return "Does Not Match". + if (!pattern.equals_ignoring_ascii_case(host_string)) + return MatchResult::DoesNotMatch; + + // 5. Return "Matches". + return MatchResult::Matches; +} + +// https://w3c.github.io/webappsec-csp/#port-part-matches +// An ASCII string input port-part matches URL url if a CSP source expression that contained the first as a port-part +// could potentially match a URL containing the latter’s port and scheme. For example, "80" port-part matches +// matches http://example.com. +[[nodiscard]] static MatchResult port_part_matches(Optional input, URL::URL const& url) +{ + // FIXME: 1. Assert: input is the empty string, "*", or a sequence of ASCII digits. + + // 2. If input is equal to "*", return "Matches". + if (input == "*"sv) + return MatchResult::Matches; + + // 3. Let normalizedInput be null if input is the empty string; otherwise input interpreted as decimal number. + Optional normalized_input; + if (input.has_value()) { + VERIFY(!input.value().is_empty()); + auto maybe_port = input.value().to_number(TrimWhitespace::No); + + // If the port is empty here, then it's because the input overflowed the u16. Since this means it's bigger than + // a u16, it can never match the URL's port, which is only within the u16 range. + if (!maybe_port.has_value()) + return MatchResult::DoesNotMatch; + + normalized_input = maybe_port.value(); + } + + // 4. If normalizedInput equals url’s port, return "Matches". + if (normalized_input == url.port()) + return MatchResult::Matches; + + // 5. If url’s port is null: + if (!url.port().has_value()) { + // 1. Let defaultPort be the default port for url’s scheme. + auto default_port = URL::default_port_for_scheme(url.scheme()); + + // 2. If normalizedInput equals defaultPort, return "Matches". + if (normalized_input == default_port) + return MatchResult::Matches; + } + + // 6. Return "Does Not Match". + return MatchResult::DoesNotMatch; +} + +// https://w3c.github.io/webappsec-csp/#path-part-match +// An ASCII string path A path-part matches another ASCII string path B if a CSP source expression that contained the +// first as a path-part could potentially match a URL containing the latter as a path. For example, we say that +// "/subdirectory/" path-part matches "/subdirectory/file". +// Spec Note: The matching relation is asymmetric. That is, path A matching path B does not mean that path B will +// match path A. +[[nodiscard]] static MatchResult path_part_matches(StringView a, StringView b) +{ + // 1. If path A is the empty string, return "Matches". + if (a.is_empty()) + return MatchResult::Matches; + + // 2. If path A consists of one character that is equal to the U+002F SOLIDUS character (/) and path B is the empty + // string, return "Matches". + if (a == "/"sv && b.is_empty()) + return MatchResult::Matches; + + // 3. Let exact match be false if the final character of path A is the U+002F SOLIDUS character (/), and true + // otherwise. + auto exact_match = !a.ends_with('/'); + + // 4. Let path list A and path list B be the result of strictly splitting path A and path B respectively on the + // U+002F SOLIDUS character (/). + auto path_list_a = a.split_view('/', SplitBehavior::KeepEmpty); + auto path_list_b = b.split_view('/', SplitBehavior::KeepEmpty); + + // 5. If path list A has more items than path list B, return "Does Not Match". + if (path_list_a.size() > path_list_b.size()) + return MatchResult::DoesNotMatch; + + // 6. If exact match is true, and path list A does not have the same number of items as path list B, + // return "Does Not Match". + if (exact_match && path_list_a.size() != path_list_b.size()) + return MatchResult::DoesNotMatch; + + // 7. If exact match is false: + if (!exact_match) { + // 1. Assert: the final item in path list A is the empty string. + VERIFY(path_list_a.last().is_empty()); + + // 2. Remove the final item from path list A. + (void)path_list_a.take_last(); + } + + // 8. For each piece A of path list A: + for (size_t path_set_a_index = 0; path_set_a_index < path_list_a.size(); ++path_set_a_index) { + auto piece_a = path_list_a[path_set_a_index]; + + // 1. Let piece B be the next item in path list B. + auto piece_b = path_list_b[path_set_a_index]; + + // 2. Let decoded piece A be the percent-decoding of piece A. + auto decoded_piece_a = URL::percent_decode(piece_a); + + // 3. Let decoded piece B be the percent-decoding of piece B. + auto decoded_piece_b = URL::percent_decode(piece_b); + + // 4. If decoded piece A is not decoded piece B, return "Does Not Match". + if (decoded_piece_a != decoded_piece_b) + return MatchResult::DoesNotMatch; + } + + // 9. Return "Matches". + return MatchResult::Matches; +} + +// https://w3c.github.io/webappsec-csp/#match-url-to-source-expression +MatchResult does_url_match_expression_in_origin_with_redirect_count(URL::URL const& url, String const& expression, URL::Origin const& origin, u8 redirect_count) +{ + // Spec Note: origin is the origin of the resource relative to which the expression should be resolved. + // "'self'", for instance, will have distinct meaning depending on that bit of context. + + // 1. If expression is the string "*", return "Matches" if one or more of the following conditions is met: + // 1. url’s scheme is an HTTP(S) scheme. + // 2. url’s scheme is the same as origin’s scheme. + // Spec Note: This logic means that in order to allow a resource from a non-HTTP(S) scheme, it has to be either + // explicitly specified (e.g. default-src * data: custom-scheme-1: custom-scheme-2:), or the protected + // resource must be loaded from the same scheme. + StringView origin_scheme {}; + if (!origin.is_opaque() && origin.scheme().has_value()) + origin_scheme = origin.scheme()->bytes_as_string_view(); + + if (expression == "*"sv && (Fetch::Infrastructure::is_http_or_https_scheme(url.scheme()) || url.scheme() == origin_scheme)) + return MatchResult::Matches; + + // 2. If expression matches the scheme-source or host-source grammar: + auto scheme_source_parse_result = parse_source_expression(Production::SchemeSource, expression); + auto host_source_parse_result = parse_source_expression(Production::HostSource, expression); + if (scheme_source_parse_result.has_value() || host_source_parse_result.has_value()) { + // 1. If expression has a scheme-part, and it does not scheme-part match url’s scheme, return "Does Not Match". + auto maybe_scheme_part = scheme_source_parse_result.has_value() + ? scheme_source_parse_result->scheme_part + : host_source_parse_result->scheme_part; + + if (maybe_scheme_part.has_value()) { + if (scheme_part_matches(maybe_scheme_part.value(), url.scheme()) == MatchResult::DoesNotMatch) + return MatchResult::DoesNotMatch; + } + + // 2. If expression matches the scheme-source grammar, return "Matches". + if (scheme_source_parse_result.has_value()) + return MatchResult::Matches; + } + + // 3. If expression matches the host-source grammar: + if (host_source_parse_result.has_value()) { + // 1. If url’s host is null, return "Does Not Match". + if (!url.host().has_value()) + return MatchResult::DoesNotMatch; + + // 2. If expression does not have a scheme-part, and origin’s scheme does not scheme-part match url’s scheme, + // return "Does Not Match". + // Spec Note: As with scheme-part above, we allow schemeless host-source expressions to be upgraded from + // insecure schemes to secure schemes. + if (!host_source_parse_result->scheme_part.has_value() && scheme_part_matches(origin_scheme, url.scheme()) == MatchResult::DoesNotMatch) + return MatchResult::DoesNotMatch; + + // 3. If expression’s host-part does not host-part match url’s host, return "Does Not Match". + VERIFY(host_source_parse_result->host_part.has_value()); + if (host_part_matches(host_source_parse_result->host_part.value(), url.host()) == MatchResult::DoesNotMatch) + return MatchResult::DoesNotMatch; + + // 4. Let port-part be expression’s port-part if present, and null otherwise. + auto port_part = host_source_parse_result->port_part; + + // 5. If port-part does not port-part match url, return "Does Not Match". + if (port_part_matches(port_part, url) == MatchResult::DoesNotMatch) + return MatchResult::DoesNotMatch; + + // 6. If expression contains a non-empty path-part, and redirect count is 0, then: + if (host_source_parse_result->path_part.has_value() && !host_source_parse_result->path_part->is_empty() && redirect_count == 0) { + // 1. Let path be the resulting of joining url’s path on the U+002F SOLIDUS character (/). + // FIXME: File spec issue that if path_part is only '/', then plainly joining will always fail to match. + // It should likely use the URL path serializer instead. + StringBuilder builder; + builder.append('/'); + builder.join('/', url.paths()); + auto path = MUST(builder.to_string()); + + // 2. If expression’s path-part does not path-part match path, return "Does Not Match". + if (path_part_matches(host_source_parse_result->path_part.value(), path) == MatchResult::DoesNotMatch) + return MatchResult::DoesNotMatch; + } + + // 7. Return "Matches". + return MatchResult::Matches; + } + + // 4. If expression is an ASCII case-insensitive match for "'self'", return "Matches" if one or more of the + // following conditions is met: + // Spec Note: Like the scheme-part logic above, the "'self'" matching algorithm allows upgrades to secure schemes + // when it is safe to do so. We limit these upgrades to endpoints running on the default port for a + // particular scheme or a port that matches the origin of the protected resource, as this seems + // sufficient to deal with upgrades that can be reasonably expected to succeed. + if (expression.equals_ignoring_ascii_case(KeywordSources::Self)) { + // 1. origin is the same as url’s origin + if (origin.is_same_origin(url.origin())) + return MatchResult::Matches; + + // 2. origin’s host is the same as url’s host, origin’s port and url’s port are either the same or the default + // ports for their respective schemes, and one or more of the following conditions is met: + auto origin_default_port = URL::default_port_for_scheme(origin_scheme); + auto url_default_port = URL::default_port_for_scheme(url.scheme()); + + Optional origin_host; + Optional origin_port; + + if (!origin.is_opaque()) { + origin_host = origin.host(); + origin_port = origin.port(); + } + + if (origin_host == url.host() && (origin.port() == url.port() || (origin_port == origin_default_port && url.port() == url_default_port))) { + // 1. url’s scheme is "https" or "wss" + if (url.scheme() == "https"sv || url.scheme() == "wss"sv) + return MatchResult::Matches; + + // 2. origin’s scheme is "http" and url’s scheme is "http" or "ws" + if (origin_scheme == "http"sv && (url.scheme() == "http"sv || url.scheme() == "ws"sv)) + return MatchResult::Matches; + } + } + + // 5. Return "Does Not Match". + return MatchResult::DoesNotMatch; +} + +// https://w3c.github.io/webappsec-csp/#match-url-to-source-list +MatchResult does_url_match_source_list_in_origin_with_redirect_count(URL::URL const& url, Vector const& source_list, URL::Origin const& origin, u8 redirect_count) +{ + // 1. Assert: source list is not null. + // NOTE: Already done by source_list being passed by reference. + + // 2. If source list is empty, return "Does Not Match". + // Spec Note: An empty source list (that is, a directive without a value: script-src, as opposed to script-src host1) + // is equivalent to a source list containing 'none', and will not match any URL. + if (source_list.is_empty()) + return MatchResult::DoesNotMatch; + + // 3. If source list’s size is 1, and source list[0] is an ASCII case-insensitive match for the string "'none'", + // return "Does Not Match". + // Spec Note: The 'none' keyword has no effect when other source expressions are present. That is, the list « 'none' » + // does not match any URL. A list consisting of « 'none', https://example.com », on the other hand, would + // match https://example.com/. + if (source_list.size() == 1 && source_list.first().equals_ignoring_ascii_case("'none'"sv)) + return MatchResult::DoesNotMatch; + + // 4. For each expression of source list: + for (auto const& expression : source_list) { + // 1. If § 6.7.2.8 Does url match expression in origin with redirect count? returns "Matches" when executed + // upon url, expression, origin, and redirect count, return "Matches". + if (does_url_match_expression_in_origin_with_redirect_count(url, expression, origin, redirect_count) == MatchResult::Matches) + return MatchResult::Matches; + } + + // 5. Return "Does Not Match". + return MatchResult::DoesNotMatch; +} + } diff --git a/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.h b/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.h index 2631e58accc..b8d859a5c87 100644 --- a/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.h +++ b/Libraries/LibWeb/ContentSecurityPolicy/Directives/DirectiveOperations.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -18,10 +19,18 @@ enum class ShouldExecute { Yes, }; +enum class MatchResult { + DoesNotMatch, + Matches, +}; + [[nodiscard]] Optional get_the_effective_directive_for_request(GC::Ref request); [[nodiscard]] Vector get_fetch_directive_fallback_list(Optional directive_name); [[nodiscard]] ShouldExecute should_fetch_directive_execute(Optional effective_directive_name, FlyString const& directive_name, GC::Ref policy); [[nodiscard]] FlyString get_the_effective_directive_for_inline_checks(Directive::InlineType type); +[[nodiscard]] MatchResult does_url_match_expression_in_origin_with_redirect_count(URL::URL const& url, String const& expression, URL::Origin const& origin, u8 redirect_count); +[[nodiscard]] MatchResult does_url_match_source_list_in_origin_with_redirect_count(URL::URL const& url, Vector const& source_list, URL::Origin const& origin, u8 redirect_count); + }