diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt index 827d64151bd..7f84ec453a0 100644 --- a/Libraries/LibWeb/CMakeLists.txt +++ b/Libraries/LibWeb/CMakeLists.txt @@ -42,6 +42,7 @@ set(SOURCES ContentSecurityPolicy/Directives/KeywordSources.cpp ContentSecurityPolicy/Directives/Names.cpp ContentSecurityPolicy/Directives/SerializedDirective.cpp + ContentSecurityPolicy/Directives/SourceExpression.cpp ContentSecurityPolicy/Policy.cpp ContentSecurityPolicy/PolicyList.cpp ContentSecurityPolicy/SecurityPolicyViolationEvent.cpp diff --git a/Libraries/LibWeb/ContentSecurityPolicy/Directives/SourceExpression.cpp b/Libraries/LibWeb/ContentSecurityPolicy/Directives/SourceExpression.cpp new file mode 100644 index 00000000000..8d4d2b7a381 --- /dev/null +++ b/Libraries/LibWeb/ContentSecurityPolicy/Directives/SourceExpression.cpp @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2025, Luke Wilde + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace Web::ContentSecurityPolicy::Directives { + +// https://w3c.github.io/webappsec-csp/#source-expression +class SourceExpressionParser { +public: + explicit SourceExpressionParser(StringView input) + : m_input(input) + , m_state({ + .lexer = GenericLexer { input }, + .parse_result = {}, + }) + { + } + + [[nodiscard]] GenericLexer const& lexer() const { return m_state.lexer; } + [[nodiscard]] SourceExpressionParseResult const& parse_result() const { return m_state.parse_result; } + + // https://w3c.github.io/webappsec-csp/#grammardef-scheme-source + [[nodiscard]] bool parse_scheme_source() + { + // ; Schemes: "https:" / "custom-scheme:" / "another.custom-scheme:" + // scheme-source = scheme-part ":" + if (!parse_scheme_part()) + return false; + + return m_state.lexer.consume_specific(':'); + } + + // https://w3c.github.io/webappsec-csp/#grammardef-scheme-part + [[nodiscard]] bool parse_scheme_part() + { + // scheme-part = scheme + // ; scheme is defined in section 3.1 of RFC 3986. + StateTransaction transaction { *this }; + + if (!parse_scheme()) + return false; + + m_state.parse_result.scheme_part = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 + [[nodiscard]] bool parse_scheme() + { + // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + if (!m_state.lexer.consume_specific_with_predicate(is_ascii_alpha)) + return false; + + (void)m_state.lexer.consume_while([](char ch) { + return is_ascii_alpha(ch) || is_ascii_digit(ch) || ch == '+' || ch == '-' || ch == '.'; + }); + return true; + } + + // https://w3c.github.io/webappsec-csp/#grammardef-host-source + [[nodiscard]] bool parse_host_source() + { + // ; Hosts: "example.com" / "*.example.com" / "https://*.example.com:12/path/to/file.js" + // host-source = [ scheme-part "://" ] host-part [ ":" port-part ] [ path-part ] + auto parse_scheme = [&] { + StateTransaction transaction { *this }; + + if (!parse_scheme_part()) + return; + + if (!m_state.lexer.consume_specific("://"sv)) { + m_state.parse_result.scheme_part = OptionalNone {}; + return; + } + + transaction.commit(); + }; + + parse_scheme(); + + if (!parse_host_part()) + return false; + + if (m_state.lexer.consume_specific(':')) { + if (!parse_port_part()) + return false; + } + + (void)parse_path_part(); + + return true; + } + + // https://w3c.github.io/webappsec-csp/#grammardef-host-part + [[nodiscard]] bool parse_host_part() + { + // host-part = "*" / [ "*." ] 1*host-char *( "." 1*host-char ) [ "." ] + StateTransaction transaction { *this }; + + if (m_state.lexer.consume_specific('*') && !m_state.lexer.consume_specific('.')) { + m_state.parse_result.host_part = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + if (!parse_host_char()) + return false; + + while (parse_host_char()) + ; + + while (m_state.lexer.consume_specific('.')) { + if (parse_host_char()) { + while (parse_host_char()) + ; + } else { + break; + } + } + + m_state.parse_result.host_part = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + // https://w3c.github.io/webappsec-csp/#grammardef-host-char + [[nodiscard]] bool parse_host_char() + { + // host-char = ALPHA / DIGIT / "-" + return m_state.lexer.consume_specific_with_predicate(is_ascii_alpha) + || m_state.lexer.consume_specific_with_predicate(is_ascii_digit) + || m_state.lexer.consume_specific('-'); + } + + // https://w3c.github.io/webappsec-csp/#grammardef-port-part + [[nodiscard]] bool parse_port_part() + { + // port-part = 1*DIGIT / "*" + StateTransaction transaction { *this }; + + if (m_state.lexer.consume_specific('*')) { + m_state.parse_result.port_part = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + if (!m_state.lexer.consume_specific_with_predicate(is_ascii_digit)) + return false; + + (void)m_state.lexer.consume_while(is_ascii_digit); + + m_state.parse_result.port_part = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + // https://w3c.github.io/webappsec-csp/#grammardef-path-part + [[nodiscard]] bool parse_path_part() + { + // path-part = path-absolute (but not including ";" or ",") + // ; path-absolute is defined in section 3.3 of RFC 3986. + StateTransaction transaction { *this }; + + if (!parse_path_absolute()) + return false; + + m_state.parse_result.path_part = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + [[nodiscard]] bool parse_path_absolute() + { + // path-absolute = "/" [ segment-nz *( "/" segment ) ] + if (!m_state.lexer.consume_specific('/')) + return false; + + if (parse_segment_non_zero()) { + while (m_state.lexer.consume_specific('/')) { + parse_segment(); + } + } + + return true; + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + void parse_segment() + { + // segment = *pchar + while (parse_path_character()) + ; + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + [[nodiscard]] bool parse_segment_non_zero() + { + // segment-nz = 1*pchar + if (!parse_path_character()) + return false; + + while (parse_path_character()) + ; + + return true; + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + [[nodiscard]] bool parse_path_character() + { + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + return parse_unreserved() + || parse_percent_encoded() + || parse_sub_delims() + || m_state.lexer.consume_specific_with_predicate(is_any_of(":@"sv)); + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 + [[nodiscard]] bool parse_unreserved() + { + // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + return m_state.lexer.consume_specific_with_predicate(is_ascii_alpha) + || m_state.lexer.consume_specific_with_predicate(is_ascii_digit) + || m_state.lexer.consume_specific_with_predicate(is_any_of("-._~"sv)); + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 + [[nodiscard]] bool parse_percent_encoded() + { + // pct-encoded = "%" HEXDIG HEXDIG + // "The uppercase hexadecimal digits 'A' through 'F' are equivalent to + // the lowercase digits 'a' through 'f', respectively. If two URIs + // differ only in the case of hexadecimal digits used in percent-encoded + // octets, they are equivalent. For consistency, URI producers and + // normalizers should use uppercase hexadecimal digits for all percent- + // encodings." + return m_state.lexer.consume_specific('%') + && m_state.lexer.consume_specific_with_predicate(is_ascii_hex_digit) + && m_state.lexer.consume_specific_with_predicate(is_ascii_hex_digit); + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 + [[nodiscard]] bool parse_sub_delims() + { + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + // / "*" / "+" / "," / ";" / "=" + // NOTE: This does not contain ';' and ',' as per the requirement specified in parse_path_part. + return m_state.lexer.consume_specific_with_predicate(is_any_of("!$&'()*+="sv)); + } + + // https://w3c.github.io/webappsec-csp/#grammardef-keyword-source + [[nodiscard]] bool parse_keyword_source() + { + // ; Keywords: + // keyword-source = "'self'" / "'unsafe-inline'" / "'unsafe-eval'" + // / "'strict-dynamic'" / "'unsafe-hashes'" / + // / "'report-sample'" / "'unsafe-allow-redirects'" + // / "'wasm-unsafe-eval'" + StateTransaction transaction { *this }; + +#define __ENUMERATE_KEYWORD_SOURCE(_, value) \ + if (m_state.lexer.consume_specific(value##sv)) { \ + m_state.parse_result.keyword_source = transaction.parsed_string_view(); \ + transaction.commit(); \ + return true; \ + } + ENUMERATE_KEYWORD_SOURCES +#undef __ENUMERATE_KEYWORD_SOURCE + + return false; + } + + // https://w3c.github.io/webappsec-csp/#grammardef-nonce-source + [[nodiscard]] bool parse_nonce_source() + { + // ; Nonces: 'nonce-[nonce goes here]' + // nonce-source = "'nonce-" base64-value "'" + auto prefix = m_state.lexer.consume(7); + if (prefix.length() != 7) + return false; + + if (!prefix.equals_ignoring_ascii_case("'nonce-"sv)) + return false; + + if (!parse_base64_value()) + return false; + + return m_state.lexer.consume_specific('\''); + } + + // https://w3c.github.io/webappsec-csp/#grammardef-base64-value + [[nodiscard]] bool parse_base64_value() + { + // base64-value = 1*( ALPHA / DIGIT / "+" / "/" / "-" / "_" )*2( "=" ) + StateTransaction transaction { *this }; + + auto is_main_part = [](char ch) { + return is_ascii_alpha(ch) || is_ascii_digit(ch) || ch == '+' || ch == '/' || ch == '-' || ch == '_'; + }; + + if (!m_state.lexer.consume_specific_with_predicate(is_main_part)) + return false; + + (void)m_state.lexer.consume_while(is_main_part); + (void)m_state.lexer.consume_specific('='); + (void)m_state.lexer.consume_specific('='); + + m_state.parse_result.base64_value = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + // https://w3c.github.io/webappsec-csp/#grammardef-hash-source + [[nodiscard]] bool parse_hash_source() + { + // ; Digests: 'sha256-[digest goes here]' + // hash-source = "'" hash-algorithm "-" base64-value "'" + if (!m_state.lexer.consume_specific('\'')) + return false; + + if (!parse_hash_algorithm()) + return false; + + if (!m_state.lexer.consume_specific('-')) + return false; + + if (!parse_base64_value()) + return false; + + return m_state.lexer.consume_specific('\''); + } + + // https://w3c.github.io/webappsec-csp/#grammardef-hash-algorithm + [[nodiscard]] bool parse_hash_algorithm() + { + // hash-algorithm = "sha256" / "sha384" / "sha512" + StateTransaction transaction { *this }; + + auto hash_algorithm = m_state.lexer.consume(6); + if (hash_algorithm.length() != 6) + return false; + + if (hash_algorithm.equals_ignoring_ascii_case("sha256"sv)) { + m_state.parse_result.hash_algorithm = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + if (hash_algorithm.equals_ignoring_ascii_case("sha384"sv)) { + m_state.parse_result.hash_algorithm = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + if (hash_algorithm.equals_ignoring_ascii_case("sha512"sv)) { + m_state.parse_result.hash_algorithm = transaction.parsed_string_view(); + transaction.commit(); + return true; + } + + return false; + } + +private: + struct State { + GenericLexer lexer; + SourceExpressionParseResult parse_result; + }; + + struct StateTransaction { + explicit StateTransaction(SourceExpressionParser& parser) + : m_parser(parser) + , m_saved_state(parser.m_state) + , m_start_index(parser.m_state.lexer.tell()) + { + } + + ~StateTransaction() + { + if (!m_commit) + m_parser.m_state = move(m_saved_state); + } + + void commit() { m_commit = true; } + StringView parsed_string_view() const + { + return m_parser.m_input.substring_view(m_start_index, m_parser.m_state.lexer.tell() - m_start_index); + } + + private: + SourceExpressionParser& m_parser; + State m_saved_state; + size_t m_start_index { 0 }; + bool m_commit { false }; + }; + + StringView m_input; + State m_state; +}; + +#define ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSERS \ + __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(SchemeSource, parse_scheme_source) \ + __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(HostSource, parse_host_source) \ + __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(KeywordSource, parse_keyword_source) \ + __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(NonceSource, parse_nonce_source) \ + __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(HashSource, parse_hash_source) + +Optional parse_source_expression(Production production, StringView input) +{ + SourceExpressionParser parser { input }; + + switch (production) { +#define __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(ProductionName, parse_production) \ + case Production::ProductionName: \ + if (!parser.parse_production()) \ + return {}; \ + break; + ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSERS +#undef __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER + default: + VERIFY_NOT_REACHED(); + } + + // If we parsed successfully but didn't reach the end, the string doesn't match the given production. + if (!parser.lexer().is_eof()) + return {}; + + return parser.parse_result(); +} + +} diff --git a/Libraries/LibWeb/ContentSecurityPolicy/Directives/SourceExpression.h b/Libraries/LibWeb/ContentSecurityPolicy/Directives/SourceExpression.h new file mode 100644 index 00000000000..d65d1663ef0 --- /dev/null +++ b/Libraries/LibWeb/ContentSecurityPolicy/Directives/SourceExpression.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025, Luke Wilde + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Web::ContentSecurityPolicy::Directives { + +struct SourceExpressionParseResult { + Optional scheme_part; + Optional host_part; + Optional port_part; + Optional path_part; + Optional keyword_source; + Optional base64_value; + Optional hash_algorithm; +}; + +enum class Production { + SchemeSource, + HostSource, + KeywordSource, + NonceSource, + HashSource, +}; + +Optional parse_source_expression(Production, StringView); + +}