/* * Copyright (c) 2025, Luke Wilde * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include namespace Web::ContentSecurityPolicy::Directives { // https://w3c.github.io/webappsec-csp/#source-expression class SourceExpressionParser { public: explicit SourceExpressionParser(StringView input) : m_input(input) , m_state({ .lexer = GenericLexer { input }, .parse_result = {}, }) { } [[nodiscard]] GenericLexer const& lexer() const { return m_state.lexer; } [[nodiscard]] SourceExpressionParseResult const& parse_result() const { return m_state.parse_result; } // https://w3c.github.io/webappsec-csp/#grammardef-scheme-source [[nodiscard]] bool parse_scheme_source() { // ; Schemes: "https:" / "custom-scheme:" / "another.custom-scheme:" // scheme-source = scheme-part ":" if (!parse_scheme_part()) return false; return m_state.lexer.consume_specific(':'); } // https://w3c.github.io/webappsec-csp/#grammardef-scheme-part [[nodiscard]] bool parse_scheme_part() { // scheme-part = scheme // ; scheme is defined in section 3.1 of RFC 3986. StateTransaction transaction { *this }; if (!parse_scheme()) return false; m_state.parse_result.scheme_part = transaction.parsed_string_view(); transaction.commit(); return true; } // https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 [[nodiscard]] bool parse_scheme() { // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) if (!m_state.lexer.consume_specific_with_predicate(is_ascii_alpha)) return false; (void)m_state.lexer.consume_while([](char ch) { return is_ascii_alpha(ch) || is_ascii_digit(ch) || ch == '+' || ch == '-' || ch == '.'; }); return true; } // https://w3c.github.io/webappsec-csp/#grammardef-host-source [[nodiscard]] bool parse_host_source() { // ; Hosts: "example.com" / "*.example.com" / "https://*.example.com:12/path/to/file.js" // host-source = [ scheme-part "://" ] host-part [ ":" port-part ] [ path-part ] auto parse_scheme = [&] { StateTransaction transaction { *this }; if (!parse_scheme_part()) return; if (!m_state.lexer.consume_specific("://"sv)) { m_state.parse_result.scheme_part = OptionalNone {}; return; } transaction.commit(); }; parse_scheme(); if (!parse_host_part()) return false; if (m_state.lexer.consume_specific(':')) { if (!parse_port_part()) return false; } (void)parse_path_part(); return true; } // https://w3c.github.io/webappsec-csp/#grammardef-host-part [[nodiscard]] bool parse_host_part() { // host-part = "*" / [ "*." ] 1*host-char *( "." 1*host-char ) [ "." ] StateTransaction transaction { *this }; if (m_state.lexer.consume_specific('*') && !m_state.lexer.consume_specific('.')) { m_state.parse_result.host_part = transaction.parsed_string_view(); transaction.commit(); return true; } if (!parse_host_char()) return false; while (parse_host_char()) ; while (m_state.lexer.consume_specific('.')) { if (parse_host_char()) { while (parse_host_char()) ; } else { break; } } m_state.parse_result.host_part = transaction.parsed_string_view(); transaction.commit(); return true; } // https://w3c.github.io/webappsec-csp/#grammardef-host-char [[nodiscard]] bool parse_host_char() { // host-char = ALPHA / DIGIT / "-" return m_state.lexer.consume_specific_with_predicate(is_ascii_alpha) || m_state.lexer.consume_specific_with_predicate(is_ascii_digit) || m_state.lexer.consume_specific('-'); } // https://w3c.github.io/webappsec-csp/#grammardef-port-part [[nodiscard]] bool parse_port_part() { // port-part = 1*DIGIT / "*" StateTransaction transaction { *this }; if (m_state.lexer.consume_specific('*')) { m_state.parse_result.port_part = transaction.parsed_string_view(); transaction.commit(); return true; } if (!m_state.lexer.consume_specific_with_predicate(is_ascii_digit)) return false; (void)m_state.lexer.consume_while(is_ascii_digit); m_state.parse_result.port_part = transaction.parsed_string_view(); transaction.commit(); return true; } // https://w3c.github.io/webappsec-csp/#grammardef-path-part [[nodiscard]] bool parse_path_part() { // path-part = path-absolute (but not including ";" or ",") // ; path-absolute is defined in section 3.3 of RFC 3986. StateTransaction transaction { *this }; if (!parse_path_absolute()) return false; m_state.parse_result.path_part = transaction.parsed_string_view(); transaction.commit(); return true; } // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 [[nodiscard]] bool parse_path_absolute() { // path-absolute = "/" [ segment-nz *( "/" segment ) ] if (!m_state.lexer.consume_specific('/')) return false; if (parse_segment_non_zero()) { while (m_state.lexer.consume_specific('/')) { parse_segment(); } } return true; } // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 void parse_segment() { // segment = *pchar while (parse_path_character()) ; } // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 [[nodiscard]] bool parse_segment_non_zero() { // segment-nz = 1*pchar if (!parse_path_character()) return false; while (parse_path_character()) ; return true; } // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 [[nodiscard]] bool parse_path_character() { // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" return parse_unreserved() || parse_percent_encoded() || parse_sub_delims() || m_state.lexer.consume_specific_with_predicate(is_any_of(":@"sv)); } // https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 [[nodiscard]] bool parse_unreserved() { // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" return m_state.lexer.consume_specific_with_predicate(is_ascii_alpha) || m_state.lexer.consume_specific_with_predicate(is_ascii_digit) || m_state.lexer.consume_specific_with_predicate(is_any_of("-._~"sv)); } // https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 [[nodiscard]] bool parse_percent_encoded() { // pct-encoded = "%" HEXDIG HEXDIG // "The uppercase hexadecimal digits 'A' through 'F' are equivalent to // the lowercase digits 'a' through 'f', respectively. If two URIs // differ only in the case of hexadecimal digits used in percent-encoded // octets, they are equivalent. For consistency, URI producers and // normalizers should use uppercase hexadecimal digits for all percent- // encodings." return m_state.lexer.consume_specific('%') && m_state.lexer.consume_specific_with_predicate(is_ascii_hex_digit) && m_state.lexer.consume_specific_with_predicate(is_ascii_hex_digit); } // https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 [[nodiscard]] bool parse_sub_delims() { // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" // / "*" / "+" / "," / ";" / "=" // NOTE: This does not contain ';' and ',' as per the requirement specified in parse_path_part. return m_state.lexer.consume_specific_with_predicate(is_any_of("!$&'()*+="sv)); } // https://w3c.github.io/webappsec-csp/#grammardef-keyword-source [[nodiscard]] bool parse_keyword_source() { // ; Keywords: // keyword-source = "'self'" / "'unsafe-inline'" / "'unsafe-eval'" // / "'strict-dynamic'" / "'unsafe-hashes'" / // / "'report-sample'" / "'unsafe-allow-redirects'" // / "'wasm-unsafe-eval'" StateTransaction transaction { *this }; #define __ENUMERATE_KEYWORD_SOURCE(_, value) \ if (m_state.lexer.consume_specific(value##sv)) { \ m_state.parse_result.keyword_source = transaction.parsed_string_view(); \ transaction.commit(); \ return true; \ } ENUMERATE_KEYWORD_SOURCES #undef __ENUMERATE_KEYWORD_SOURCE return false; } // https://w3c.github.io/webappsec-csp/#grammardef-nonce-source [[nodiscard]] bool parse_nonce_source() { // ; Nonces: 'nonce-[nonce goes here]' // nonce-source = "'nonce-" base64-value "'" auto prefix = m_state.lexer.consume(7); if (prefix.length() != 7) return false; if (!prefix.equals_ignoring_ascii_case("'nonce-"sv)) return false; if (!parse_base64_value()) return false; return m_state.lexer.consume_specific('\''); } // https://w3c.github.io/webappsec-csp/#grammardef-base64-value [[nodiscard]] bool parse_base64_value() { // base64-value = 1*( ALPHA / DIGIT / "+" / "/" / "-" / "_" )*2( "=" ) StateTransaction transaction { *this }; auto is_main_part = [](char ch) { return is_ascii_alpha(ch) || is_ascii_digit(ch) || ch == '+' || ch == '/' || ch == '-' || ch == '_'; }; if (!m_state.lexer.consume_specific_with_predicate(is_main_part)) return false; (void)m_state.lexer.consume_while(is_main_part); (void)m_state.lexer.consume_specific('='); (void)m_state.lexer.consume_specific('='); m_state.parse_result.base64_value = transaction.parsed_string_view(); transaction.commit(); return true; } // https://w3c.github.io/webappsec-csp/#grammardef-hash-source [[nodiscard]] bool parse_hash_source() { // ; Digests: 'sha256-[digest goes here]' // hash-source = "'" hash-algorithm "-" base64-value "'" if (!m_state.lexer.consume_specific('\'')) return false; if (!parse_hash_algorithm()) return false; if (!m_state.lexer.consume_specific('-')) return false; if (!parse_base64_value()) return false; return m_state.lexer.consume_specific('\''); } // https://w3c.github.io/webappsec-csp/#grammardef-hash-algorithm [[nodiscard]] bool parse_hash_algorithm() { // hash-algorithm = "sha256" / "sha384" / "sha512" StateTransaction transaction { *this }; auto hash_algorithm = m_state.lexer.consume(6); if (hash_algorithm.length() != 6) return false; if (hash_algorithm.equals_ignoring_ascii_case("sha256"sv)) { m_state.parse_result.hash_algorithm = transaction.parsed_string_view(); transaction.commit(); return true; } if (hash_algorithm.equals_ignoring_ascii_case("sha384"sv)) { m_state.parse_result.hash_algorithm = transaction.parsed_string_view(); transaction.commit(); return true; } if (hash_algorithm.equals_ignoring_ascii_case("sha512"sv)) { m_state.parse_result.hash_algorithm = transaction.parsed_string_view(); transaction.commit(); return true; } return false; } private: struct State { GenericLexer lexer; SourceExpressionParseResult parse_result; }; struct StateTransaction { explicit StateTransaction(SourceExpressionParser& parser) : m_parser(parser) , m_saved_state(parser.m_state) , m_start_index(parser.m_state.lexer.tell()) { } ~StateTransaction() { if (!m_commit) m_parser.m_state = move(m_saved_state); } void commit() { m_commit = true; } StringView parsed_string_view() const { return m_parser.m_input.substring_view(m_start_index, m_parser.m_state.lexer.tell() - m_start_index); } private: SourceExpressionParser& m_parser; State m_saved_state; size_t m_start_index { 0 }; bool m_commit { false }; }; StringView m_input; State m_state; }; #define ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSERS \ __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(SchemeSource, parse_scheme_source) \ __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(HostSource, parse_host_source) \ __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(KeywordSource, parse_keyword_source) \ __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(NonceSource, parse_nonce_source) \ __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(HashSource, parse_hash_source) Optional parse_source_expression(Production production, StringView input) { SourceExpressionParser parser { input }; switch (production) { #define __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(ProductionName, parse_production) \ case Production::ProductionName: \ if (!parser.parse_production()) \ return {}; \ break; ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSERS #undef __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER default: VERIFY_NOT_REACHED(); } // If we parsed successfully but didn't reach the end, the string doesn't match the given production. if (!parser.lexer().is_eof()) return {}; return parser.parse_result(); } }