LibWeb: Introduce Content Security Policy policies and directives

These form the basis of Content Security Policy. A policy is a
collection of directives that are parsed from either the
Content-Security-Policy(-Report-Only) HTTP header, or the `<meta>`
element.

The directives are what restrict the operations can be performed in the
current global execution context. For example, "frame-ancestors: none"
tells us to prevent the page from being loaded in an embedded context,
such as `<iframe>`.

You can see it a bit like OpenBSD's pledge() functionality, but for the
web platform: https://man.openbsd.org/pledge.2
This commit is contained in:
Luke Wilde 2024-11-25 16:17:17 +00:00 committed by Andreas Kling
commit e34a6c86b9
Notes: github-actions[bot] 2025-03-04 13:28:21 +00:00
20 changed files with 846 additions and 3 deletions

View file

@ -0,0 +1,199 @@
/*
* Copyright (c) 2025, Luke Wilde <luke@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/GenericLexer.h>
#include <AK/String.h>
#include <LibWeb/ContentSecurityPolicy/Directives/DirectiveFactory.h>
#include <LibWeb/ContentSecurityPolicy/Directives/SerializedDirective.h>
#include <LibWeb/ContentSecurityPolicy/Policy.h>
#include <LibWeb/ContentSecurityPolicy/PolicyList.h>
#include <LibWeb/ContentSecurityPolicy/SerializedPolicy.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Responses.h>
#include <LibWeb/Infra/CharacterTypes.h>
#include <LibWeb/Infra/Strings.h>
namespace Web::ContentSecurityPolicy {
GC_DEFINE_ALLOCATOR(Policy);
// https://w3c.github.io/webappsec-csp/#abstract-opdef-parse-a-serialized-csp
GC::Ref<Policy> Policy::parse_a_serialized_csp(JS::Realm& realm, Variant<ByteBuffer, String> serialized, Source source, Disposition disposition)
{
// To parse a serialized CSP, given a byte sequence or string serialized, a source source, and a disposition disposition,
// execute the following steps.
// This algorithm returns a Content Security Policy object. If serialized could not be parsed, the objects directive
// set will be empty.
// 1. If serialized is a byte sequence, then set serialized to be the result of isomorphic decoding serialized.
auto serialized_string = serialized.has<String>()
? serialized.get<String>()
: Infra::isomorphic_decode(serialized.get<ByteBuffer>());
// 2. Let policy be a new policy with an empty directive set, a source of source, and a disposition of disposition.
auto policy = realm.create<Policy>();
policy->m_source = source;
policy->m_disposition = disposition;
// 3. For each token returned by strictly splitting serialized on the U+003B SEMICOLON character (;):
auto tokens = MUST(serialized_string.split(';', SplitBehavior::KeepEmpty));
for (auto token : tokens) {
// 1. Strip leading and trailing ASCII whitespace from token.
auto stripped_token = MUST(token.trim(Infra::ASCII_WHITESPACE));
auto stripped_token_view = stripped_token.bytes_as_string_view();
// 2. If token is an empty string, or if token is not an ASCII string, continue.
if (stripped_token.is_empty() || !all_of(stripped_token_view, is_ascii))
continue;
// 3. Let directive name be the result of collecting a sequence of code points from token which are not
// ASCII whitespace.
GenericLexer lexer(stripped_token_view);
auto directive_name = lexer.consume_until(Infra::is_ascii_whitespace);
// 4. Set directive name to be the result of running ASCII lowercase on directive name.
// Spec Note: Directive names are case-insensitive, that is: script-SRC 'none' and ScRiPt-sRc 'none' are
// equivalent.
auto lowercase_directive_name = MUST(Infra::to_ascii_lowercase(directive_name));
// 5. If policys directive set contains a directive whose name is directive name, continue.
if (policy->contains_directive_with_name(lowercase_directive_name)) {
// Spec Note: In this case, the user agent SHOULD notify developers that a duplicate directive was
// ignored. A console warning might be appropriate, for example.
dbgln("Ignoring duplicate Content Security Policy directive: {}", lowercase_directive_name);
continue;
}
// 6. Let directive value be the result of splitting token on ASCII whitespace.
auto rest_of_the_token = lexer.consume_all();
auto directive_value_views = rest_of_the_token.split_view_if(Infra::is_ascii_whitespace);
Vector<String> directive_value;
for (auto directive_value_view : directive_value_views) {
String directive_value_entry = MUST(String::from_utf8(directive_value_view));
directive_value.append(move(directive_value_entry));
}
// 7. Let directive be a new directive whose name is directive name, and value is directive value.
auto directive = Directives::create_directive(realm, move(lowercase_directive_name), move(directive_value));
// 8. Append directive to policys directive set.
policy->m_directives.append(directive);
}
// 4. Return policy.
return policy;
}
// https://w3c.github.io/webappsec-csp/#abstract-opdef-parse-a-responses-content-security-policies
GC::Ref<PolicyList> Policy::parse_a_responses_content_security_policies(JS::Realm& realm, GC::Ref<Fetch::Infrastructure::Response const> response)
{
// To parse a responses Content Security Policies given a response response, execute the following steps.
// This algorithm returns a list of Content Security Policy objects. If the policies cannot be parsed,
// the returned list will be empty.
// 1. Let policies be an empty list.
GC::RootVector<GC::Ref<Policy>> policies(realm.heap());
// 2. For each token returned by extracting header list values given Content-Security-Policy and responses header
// list:
auto enforce_policy_tokens_or_failure = Fetch::Infrastructure::extract_header_list_values("Content-Security-Policy"sv.bytes(), response->header_list());
auto enforce_policy_tokens = enforce_policy_tokens_or_failure.has<Vector<ByteBuffer>>() ? enforce_policy_tokens_or_failure.get<Vector<ByteBuffer>>() : Vector<ByteBuffer> {};
for (auto enforce_policy_token : enforce_policy_tokens) {
// 1. Let policy be the result of parsing token, with a source of "header", and a disposition of "enforce".
auto policy = parse_a_serialized_csp(realm, enforce_policy_token, Policy::Source::Header, Policy::Disposition::Enforce);
// 2. If policys directive set is not empty, append policy to policies.
if (!policy->m_directives.is_empty()) {
policies.append(policy);
}
}
// 3. For each token returned by extracting header list values given Content-Security-Policy-Report-Only and
// responses header list:
auto report_policy_tokens_or_failure = Fetch::Infrastructure::extract_header_list_values("Content-Security-Policy-Report-Only"sv.bytes(), response->header_list());
auto report_policy_tokens = report_policy_tokens_or_failure.has<Vector<ByteBuffer>>() ? report_policy_tokens_or_failure.get<Vector<ByteBuffer>>() : Vector<ByteBuffer> {};
for (auto report_policy_token : report_policy_tokens) {
// 1. Let policy be the result of parsing token, with a source of "header", and a disposition of "report".
auto policy = parse_a_serialized_csp(realm, report_policy_token, Policy::Source::Header, Policy::Disposition::Report);
// 2. If policys directive set is not empty, append policy to policies.
if (!policy->m_directives.is_empty()) {
policies.append(policy);
}
}
// 4. For each policy of policies:
for (auto& policy : policies) {
// 1. Set policys self-origin to responses url's origin.
policy->m_self_origin = response->url()->origin();
}
// 5. Return policies.
return PolicyList::create(realm, policies);
}
GC::Ref<Policy> Policy::create_from_serialized_policy(JS::Realm& realm, SerializedPolicy const& serialized_policy)
{
auto policy = realm.create<Policy>();
for (auto const& serialized_directive : serialized_policy.directives) {
auto directive = Directives::create_directive(realm, serialized_directive.name, serialized_directive.value);
policy->m_directives.append(directive);
}
policy->m_disposition = serialized_policy.disposition;
policy->m_source = serialized_policy.source;
policy->m_self_origin = serialized_policy.self_origin;
return policy;
}
bool Policy::contains_directive_with_name(StringView name) const
{
auto maybe_directive = m_directives.find_if([name](auto const& directive) {
return directive->name() == name;
});
return !maybe_directive.is_end();
}
GC::Ref<Policy> Policy::clone(JS::Realm& realm) const
{
auto policy = realm.create<Policy>();
for (auto directive : m_directives) {
auto cloned_directive = directive->clone(realm);
policy->m_directives.append(cloned_directive);
}
policy->m_disposition = m_disposition;
policy->m_source = m_source;
policy->m_self_origin = m_self_origin;
return policy;
}
SerializedPolicy Policy::serialize() const
{
Vector<Directives::SerializedDirective> serialized_directives;
for (auto directive : m_directives) {
serialized_directives.append(directive->serialize());
}
return SerializedPolicy {
.directives = move(serialized_directives),
.disposition = m_disposition,
.source = m_source,
.self_origin = m_self_origin,
};
}
void Policy::visit_edges(Cell::Visitor& visitor)
{
Base::visit_edges(visitor);
visitor.visit(m_directives);
}
}