mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-07 18:17:23 +00:00
LibWeb/MimeSniff: Add WebM signature sniffing and VINT parsing
Adds `matches_webm_signature()` and `parse_vint()` helpers per WPT spec. Uses these helpers to resolve the WebM FIXME that was in `match_an_audio_or_video_type_pattern()`.
This commit is contained in:
parent
20662f0dc9
commit
e0e513e9fc
Notes:
github-actions[bot]
2025-05-15 15:40:19 +00:00
Author: https://github.com/Prospero23 🔰
Commit: e0e513e9fc
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/4739
Reviewed-by: https://github.com/ADKaster ✅
Reviewed-by: https://github.com/paaspaas00
Reviewed-by: https://github.com/stelar7
2 changed files with 164 additions and 1 deletions
|
@ -183,6 +183,138 @@ bool matches_mp4_signature(ReadonlyBytes sequence)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct VintResult {
|
||||||
|
u64 value;
|
||||||
|
size_t length;
|
||||||
|
};
|
||||||
|
|
||||||
|
// https://mimesniff.spec.whatwg.org/#parse-a-vint
|
||||||
|
VintResult parse_a_vint(ReadonlyBytes sequence, size_t iter)
|
||||||
|
{
|
||||||
|
// To parse a vint on a byte sequence sequence of size length, starting at index iter use the following steps:
|
||||||
|
sequence = sequence.slice(iter);
|
||||||
|
auto length = sequence.size();
|
||||||
|
|
||||||
|
// 1. let mask be 128.
|
||||||
|
u8 mask = 128;
|
||||||
|
|
||||||
|
// 2. Let max vint length be 8.
|
||||||
|
size_t const max_vint_length = 8;
|
||||||
|
|
||||||
|
// 3. Let number size be 1.
|
||||||
|
size_t number_size = 1;
|
||||||
|
|
||||||
|
// 5. Let index be 0.
|
||||||
|
// NOTE: spec declares this after step four, but it is needed before to use in the loop.
|
||||||
|
size_t index = 0;
|
||||||
|
|
||||||
|
// 4. While number size is less than max vint length, and less than length, continuously loop through these steps:
|
||||||
|
while (number_size < max_vint_length && number_size < length) {
|
||||||
|
// 1. If the sequence[index] & mask is not zero, abort these steps.
|
||||||
|
if (sequence[index] & mask)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// 2. Let mask be the value of mask >> 1.
|
||||||
|
mask >>= 1;
|
||||||
|
|
||||||
|
// 3. Increment number size by one.
|
||||||
|
number_size++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Let parsed number be sequence[index] & ~mask.
|
||||||
|
u64 parsed_number = sequence[index] & ~mask;
|
||||||
|
|
||||||
|
// 7. Increment index by one.
|
||||||
|
index++;
|
||||||
|
|
||||||
|
// 8. Let bytes remaining be the value of number size - 1.
|
||||||
|
auto bytes_remaining = number_size - 1;
|
||||||
|
|
||||||
|
// 9. While bytes remaining is not zero, execute there steps:
|
||||||
|
while (bytes_remaining != 0) {
|
||||||
|
// 1. Let parsed number be parsed number << 8.
|
||||||
|
parsed_number <<= 8;
|
||||||
|
|
||||||
|
// 2. Let parsed number be parsed number | sequence[index].
|
||||||
|
parsed_number |= sequence[index];
|
||||||
|
|
||||||
|
// 3. Increment index by one.
|
||||||
|
index++;
|
||||||
|
|
||||||
|
// 4. If index is greater or equal than length, abort these steps.
|
||||||
|
if (index >= length)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// 5. Decrement bytes remaining by one.
|
||||||
|
bytes_remaining--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 10. Return parsed number and number size
|
||||||
|
return { parsed_number, number_size };
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://mimesniff.spec.whatwg.org/#signature-for-webm
|
||||||
|
bool matches_webm_signature(ReadonlyBytes sequence)
|
||||||
|
{
|
||||||
|
// 1. Let sequence be the byte sequence to be matched, where sequence[s] is byte s in sequence and sequence[0] is the first byte in sequence.
|
||||||
|
|
||||||
|
// 2. Let length be the number of bytes in sequence.
|
||||||
|
auto length = sequence.size();
|
||||||
|
|
||||||
|
// 3. If length is less than 4, return false.
|
||||||
|
if (length < 4)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// 4. If the four bytes from sequence[0] to sequence[3], are not equal to 0x1A 0x45 0xDF 0xA3, return false.
|
||||||
|
static auto constexpr webm_signature = "\x1A\x45\xDF\xA3"sv;
|
||||||
|
if (!sequence.starts_with(webm_signature.bytes()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// 5. let iter be 4.
|
||||||
|
size_t iter = 4;
|
||||||
|
|
||||||
|
// 6. While iter is less than length and iter is less than 38, continuously loop through these steps:
|
||||||
|
while (iter < length && iter < 38) {
|
||||||
|
// before slicing 2 bytes, ensure there are at least 2 bytes
|
||||||
|
if (iter + 1 >= length)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// 1. If the two bytes from sequence[iter] to sequence[iter + 1] are equal to 0x42 0x82,
|
||||||
|
if (sequence.slice(iter, 2) == "\x42\x82"sv.bytes()) {
|
||||||
|
// 1. increment iter by 2.
|
||||||
|
iter += 2;
|
||||||
|
|
||||||
|
// 2. If iter is greater or equal than length, abort these steps.
|
||||||
|
if (iter >= length)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// 3. Let number size be the result of parsing a vint starting at sequence[iter].
|
||||||
|
|
||||||
|
auto const number_size = parse_a_vint(sequence, iter).length;
|
||||||
|
|
||||||
|
// 4. Increment iter by number size.
|
||||||
|
iter += number_size;
|
||||||
|
|
||||||
|
// 5. If iter is greater than or equal to length - 4, abort these steps.
|
||||||
|
if (iter >= length - 4)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// 6. Let matched be the result of matching a padded sequence 0x77 0x65 0x62 0x6D ("webm") on sequence at offset iter.
|
||||||
|
bool matched = sequence.slice(iter, 4) == "\x77\x65\x62\x6D"sv.bytes();
|
||||||
|
|
||||||
|
// 7. If matched is true, abort these steps and return true.
|
||||||
|
if (matched)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Increment iter by 1.
|
||||||
|
iter += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. Return false.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern
|
// https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern
|
||||||
Optional<MimeType> match_an_audio_or_video_type_pattern(ReadonlyBytes input)
|
Optional<MimeType> match_an_audio_or_video_type_pattern(ReadonlyBytes input)
|
||||||
{
|
{
|
||||||
|
@ -225,7 +357,10 @@ Optional<MimeType> match_an_audio_or_video_type_pattern(ReadonlyBytes input)
|
||||||
if (matches_mp4_signature(input))
|
if (matches_mp4_signature(input))
|
||||||
return MimeType::create("video"_string, "mp4"_string);
|
return MimeType::create("video"_string, "mp4"_string);
|
||||||
|
|
||||||
// FIXME: 3. If input matches the signature for WebM, return "video/webm".
|
// 3. If input matches the signature for WebM, return "video/webm".
|
||||||
|
if (matches_webm_signature(input))
|
||||||
|
return MimeType::create("video"_string, "webm"_string);
|
||||||
|
|
||||||
// FIXME: 4. If input matches the signature for MP3 without ID3, return "audio/mpeg".
|
// FIXME: 4. If input matches the signature for MP3 without ID3, return "audio/mpeg".
|
||||||
|
|
||||||
// 5. Return undefined.
|
// 5. Return undefined.
|
||||||
|
|
|
@ -295,6 +295,34 @@ TEST_CASE(determine_computed_mime_type_when_trying_to_match_mp4_signature)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(determine_computed_mime_type_when_trying_to_match_webm_signature)
|
||||||
|
{
|
||||||
|
|
||||||
|
HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
|
||||||
|
|
||||||
|
mime_type_to_headers_map.set("application/octet-stream"sv, {
|
||||||
|
// Payload length < 4.
|
||||||
|
"<4"sv,
|
||||||
|
// First four bytes are not 0x1A 0x45 0xDF 0xA3.
|
||||||
|
"\x00\x00\x00\x00"sv,
|
||||||
|
// Correct first four bytes, but no following WebM element
|
||||||
|
"\x1A\x45\xDF\xA3\x00\x00\x00\x00"sv,
|
||||||
|
});
|
||||||
|
mime_type_to_headers_map.set("video/webm"sv, {
|
||||||
|
// Input that should parse correctly.
|
||||||
|
"\x1A\x45\xDF\xA3\x42\x82\x84\x77\x65\x62\x6D\x00"sv,
|
||||||
|
});
|
||||||
|
|
||||||
|
for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
|
||||||
|
auto mime_type = mime_type_to_headers.key;
|
||||||
|
|
||||||
|
for (auto const& header : mime_type_to_headers.value) {
|
||||||
|
auto computed_mime_type = Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo });
|
||||||
|
EXPECT_EQ(mime_type, computed_mime_type.serialized());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(determine_computed_mime_type_in_a_font_context)
|
TEST_CASE(determine_computed_mime_type_in_a_font_context)
|
||||||
{
|
{
|
||||||
// Cover case where supplied type is an XML MIME type.
|
// Cover case where supplied type is an XML MIME type.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue