LibWeb/MimeSniff: Add MP3 without ID3 sniffing

Removes the associated FIXME in match_an_audio_or_video_type_pattern().
Sniffing process is a simplified version of the full spec, as it only
checks one frame of the mp3. To fully align with the spec, it would
also have to check a second frame by calculating frame size as
described in the spec.
This commit is contained in:
Ben Eidson 2025-05-18 17:00:03 -04:00 committed by Andrew Kaster
commit bd68a99f14
Notes: github-actions[bot] 2025-06-09 13:51:34 +00:00
2 changed files with 169 additions and 2 deletions

View file

@ -1,5 +1,6 @@
/* /*
* Copyright (c) 2023-2024, Kemal Zebari <kemalzebra@gmail.com>. * Copyright (c) 2023-2024, Kemal Zebari <kemalzebra@gmail.com>.
* Copyright (c) 2025, Ben Eidson <b.e.eidson@gmail.com>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -315,6 +316,79 @@ bool matches_webm_signature(ReadonlyBytes sequence)
return false; return false;
} }
// https://mimesniff.spec.whatwg.org/#match-an-mp3-header
bool match_mp3_header(ReadonlyBytes sequence, size_t s)
{
// To match an mp3 header, using a byte sequence sequence of length length at offset s execute these steps:
size_t length = sequence.size();
// 1. If length is less than 4, return false.
if (length - s < 4)
return false;
// 2. If sequence[s] is not equal to 0xff and sequence[s + 1] & 0xe0 is not equal to 0xe0, return false.
// NOTE: spec is wrong here, should be or.
if (sequence[s] != 0xff || (sequence[s + 1] & 0xe0) != 0xe0)
return false;
// 3. Let layer be the result of sequence[s + 1] & 0x06 >> 1.
u8 layer = (sequence[s + 1] & 0x06) >> 1;
// 4. If layer is 0, return false.
if (layer == 0)
return false;
// 5. Let bit-rate be sequence[s + 2] & 0xf0 >> 4.
u8 bit_rate = (sequence[s + 2] & 0xf0) >> 4;
// 6. If bit-rate is 15, return false.
if (bit_rate == 15)
return false;
// 7. Let sample-rate be sequence[s + 2] & 0x0c >> 2.
u8 sample_rate = (sequence[s + 2] & 0x0c) >> 2;
// 8. If sample-rate is 3, return false.
if (sample_rate == 3)
return false;
// FIXME: 9. Let freq be the value given by sample-rate in the table sample-rate.
// FIXME: 10. Let final-layer be the result of 4 - (sequence[s + 1]).
// FIXME: 11. If final-layer & 0x06 >> 1 is not 3, return false.
// 12. Return true.
return true;
}
// https://mimesniff.spec.whatwg.org/#matches-the-signature-for-mp3-without-id3
// FIXME: Instead of implementing full spec, only matches single-frame mp3 header due to the issues stated at (https://github.com/whatwg/mimesniff/issues/70)
bool matches_mp3_no_id3_signature(ReadonlyBytes sequence)
{
// 1. Let sequence be the byte sequence to be matched, where sequence[s] is byte s in sequence and sequence[0] is the first byte in sequence.
// FIXME: 2. Let length be the number of bytes in sequence.
// FIXME: 3. Initialize s to 0.
// 4. If the result of the operation match mp3 header is false, return false.
bool result = match_mp3_header(sequence, 0);
if (!result)
return false;
// FIXME: 5. Parse an mp3 frame on sequence at offset s
// FIXME: 6. Let skipped-bytes the return value of the execution of mp3 framesize computation
// FIXME: 7. If skipped-bytes is less than 4, or skipped-bytes is greater than s - length, return false.
// FIXME: 8. Increment s by skipped-bytes.
// FIXME: 9. If the result of the operation match mp3 header operation is false, return false, else, return true.
return true;
}
// https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern // https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern
Optional<MimeType> match_an_audio_or_video_type_pattern(ReadonlyBytes input) Optional<MimeType> match_an_audio_or_video_type_pattern(ReadonlyBytes input)
{ {
@ -361,7 +435,9 @@ Optional<MimeType> match_an_audio_or_video_type_pattern(ReadonlyBytes input)
if (matches_webm_signature(input)) if (matches_webm_signature(input))
return MimeType::create("video"_string, "webm"_string); return MimeType::create("video"_string, "webm"_string);
// FIXME: 4. If input matches the signature for MP3 without ID3, return "audio/mpeg". // 4. If input matches the signature for MP3 without ID3, return "audio/mpeg".
if (matches_mp3_no_id3_signature(input))
return MimeType::create("audio"_string, "mpeg"_string);
// 5. Return undefined. // 5. Return undefined.
return OptionalNone {}; return OptionalNone {};

View file

@ -1,6 +1,7 @@
/* /*
* Copyright (c) 2023-2024, Kemal Zebari <kemalzebra@gmail.com>. * Copyright (c) 2023-2024, Kemal Zebari <kemalzebra@gmail.com>
* Copyright (c) 2024, Jamie Mansfield <jmansfield@cadixdev.org> * Copyright (c) 2024, Jamie Mansfield <jmansfield@cadixdev.org>
* Copyright (c) 2025, Ben Eidson <b.e.eidson@gmail.com>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -323,6 +324,96 @@ TEST_CASE(determine_computed_mime_type_when_trying_to_match_webm_signature)
} }
} }
// http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
struct MP3FrameOptions {
bool validLength = true;
// include the 0xFFF sync word?
bool sync = true;
// 3=MPEG-1, 2=MPEG-2, 0=MPEG-2.5
u8 version = 3;
// 1=III, 2=II, 3=I
u8 layer = 1;
// true=no CRC, false=CRC follows
bool protect = true;
// 114 valid
u8 bitrate_index = 9;
// 0=44.1k,1=48k,2=32k
u8 samplerate_index = 0;
// padding bit
bool padded = false;
// filler bytes
size_t payload_bytes = 100;
};
static ByteBuffer make_mp3_frame(MP3FrameOptions opts)
{
if (!opts.validLength)
return MUST(ByteBuffer::create_zeroed(2));
size_t total_size = 4 + opts.payload_bytes;
auto buffer = MUST(ByteBuffer::create_zeroed(total_size));
auto* data = buffer.data();
// first 8 bits of sync (0xFFF)
if (opts.sync)
data[0] = 0xFF;
// 1110 0000 = last three sync bits
data[1] = 0xE0
// bits 43: version
| ((opts.version & 0x3) << 3)
// bits 21: layer
| ((opts.layer & 0x3) << 1)
// bit 0: protection
| (opts.protect & 0x1);
// bits 74: bitrate index
data[2] = ((opts.bitrate_index & 0xF) << 4)
// bits 32: samplerate index
| ((opts.samplerate_index & 0x3) << 2)
// bit 1: pad
| ((opts.padded & 0x1) << 1);
// bit 0: private (keep zero)
// 3) Rest of header (channel flags, etc.) not needed for sniff
data[3] = 0x00;
// Payload bytes are already zeroed
return buffer;
}
TEST_CASE(determine_computed_mime_type_when_trying_to_match_mp3_no_id3_signature)
{
HashMap<StringView, Vector<ByteBuffer>> mime_type_to_headers_map;
mime_type_to_headers_map.set("application/octet-stream"sv, {
// Payload length < 4.
make_mp3_frame({ .validLength = false }),
// invalid sync
make_mp3_frame({ .sync = false }),
// invalid layer (reserved)
make_mp3_frame({ .layer = 0 }),
// invalid bitrate
make_mp3_frame({ .bitrate_index = 15 }),
// invalid sample rate
make_mp3_frame({ .samplerate_index = 3 }),
});
mime_type_to_headers_map.set("audio/mpeg"sv, {
make_mp3_frame({ .padded = true }),
make_mp3_frame({ .padded = false }),
});
for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
auto mime_type = mime_type_to_headers.key;
for (auto const& header : mime_type_to_headers.value) {
auto computed_mime_type = Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo });
EXPECT_EQ(mime_type, computed_mime_type.serialized());
}
}
}
TEST_CASE(determine_computed_mime_type_in_a_font_context) TEST_CASE(determine_computed_mime_type_in_a_font_context)
{ {
// Cover case where supplied type is an XML MIME type. // Cover case where supplied type is an XML MIME type.