From 7b3465ab55142e7e8b5a8261531ce87960bbceac Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 15 Sep 2025 09:23:21 -0400 Subject: [PATCH] LibWeb: Do not require multipart form data to end with CRLF According to RFC 2046, the BNF of the form data body is: multipart-body := [preamble CRLF] dash-boundary transport-padding CRLF body-part *encapsulation close-delimiter transport-padding [CRLF epilogue] Where "epilogue" is any text that "may be ignored or discarded". So we should stop parsing the body once we encounter the terminating delimiter ("--"). Note that our parsing function is from an attempt to standardize the grammar in the spec: https://andreubotella.github.io/multipart-form-data This proposal hasn't been updated in ~4 years, and the fetch spec still does not have a formal definition of the body string. --- Libraries/LibWeb/Fetch/Body.cpp | 4 ++- .../Fetch/multipart-form-data-crlf.txt | 6 ++++ .../input/Fetch/multipart-form-data-crlf.html | 36 +++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 Tests/LibWeb/Text/expected/Fetch/multipart-form-data-crlf.txt create mode 100644 Tests/LibWeb/Text/input/Fetch/multipart-form-data-crlf.html diff --git a/Libraries/LibWeb/Fetch/Body.cpp b/Libraries/LibWeb/Fetch/Body.cpp index 9408a5e4386..bd87072911f 100644 --- a/Libraries/LibWeb/Fetch/Body.cpp +++ b/Libraries/LibWeb/Fetch/Body.cpp @@ -394,7 +394,9 @@ MultipartParsingErrorOr> parse_multipart_form_data(JS return MultipartParsingError { MUST(String::formatted("Expected `--` followed by boundary at position {}", lexer.tell())) }; // 2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A (`--` followed by CR LF) followed by the end of input, return entry list. - if (lexer.next_is("--\r\n"sv)) + // NOTE: We do not require the input to end with CRLF to match the behavior of other browsers. According to RFC 2046, we are to discard any + // text after the terminating `--`. See: https://datatracker.ietf.org/doc/html/rfc2046#page-22 + if (lexer.next_is("--"sv)) return entry_list; // 3. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. diff --git a/Tests/LibWeb/Text/expected/Fetch/multipart-form-data-crlf.txt b/Tests/LibWeb/Text/expected/Fetch/multipart-form-data-crlf.txt new file mode 100644 index 00000000000..73cc57085a7 --- /dev/null +++ b/Tests/LibWeb/Text/expected/Fetch/multipart-form-data-crlf.txt @@ -0,0 +1,6 @@ +Data: value0 +Data: value1 +Data: value2 +Data: value3 +Data: value4 +Data: value5 diff --git a/Tests/LibWeb/Text/input/Fetch/multipart-form-data-crlf.html b/Tests/LibWeb/Text/input/Fetch/multipart-form-data-crlf.html new file mode 100644 index 00000000000..157edadf58d --- /dev/null +++ b/Tests/LibWeb/Text/input/Fetch/multipart-form-data-crlf.html @@ -0,0 +1,36 @@ + + +