ladybird/Userland/Libraries/LibMedia/Audio/FFmpegLoader.cpp
Jelle Raaijmakers 57783eff24 LibMedia: Convert OggLoader into generic FFmpegLoader
This loader supports whatever format libavformat and libavcodec can
handle. Currently only seekable streams are supported, and we still have
some limitations as to the number of channels and sample format.

Plays all non-streaming audio files at:

  https://tools.woolyss.com/html5-audio-video-tester/
2024-09-30 18:48:12 +02:00

345 lines
13 KiB
C++

/*
* Copyright (c) 2024, Jelle Raaijmakers <jelle@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "FFmpegLoader.h"
#include <AK/BitStream.h>
#include <AK/NumericLimits.h>
#include <LibCore/System.h>
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100)
# define USE_FFMPEG_CH_LAYOUT
#endif
#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(59, 0, 100)
# define USE_CONSTIFIED_POINTERS
#endif
namespace Audio {
static constexpr int BUFFER_MAX_PROBE_SIZE = 64 * KiB;
FFmpegIOContext::FFmpegIOContext(AVIOContext* avio_context)
: m_avio_context(avio_context)
{
}
FFmpegIOContext::~FFmpegIOContext()
{
// NOTE: free the buffer inside the AVIO context, since it might be changed since its initial allocation
av_free(m_avio_context->buffer);
avio_context_free(&m_avio_context);
}
ErrorOr<NonnullOwnPtr<FFmpegIOContext>, LoaderError> FFmpegIOContext::create(AK::SeekableStream& stream)
{
auto* avio_buffer = av_malloc(PAGE_SIZE);
if (avio_buffer == nullptr)
return LoaderError { LoaderError::Category::IO, "Failed to allocate AVIO buffer" };
// This AVIOContext explains to avformat how to interact with our stream
auto* avio_context = avio_alloc_context(
static_cast<unsigned char*>(avio_buffer),
PAGE_SIZE,
0,
&stream,
[](void* opaque, u8* buffer, int size) -> int {
auto& stream = *static_cast<SeekableStream*>(opaque);
AK::Bytes buffer_bytes { buffer, AK::min<size_t>(size, PAGE_SIZE) };
auto read_bytes_or_error = stream.read_some(buffer_bytes);
if (read_bytes_or_error.is_error()) {
if (read_bytes_or_error.error().code() == EOF)
return AVERROR_EOF;
return AVERROR_UNKNOWN;
}
int number_of_bytes_read = read_bytes_or_error.value().size();
if (number_of_bytes_read == 0)
return AVERROR_EOF;
return number_of_bytes_read;
},
nullptr,
[](void* opaque, int64_t offset, int whence) -> int64_t {
whence &= ~AVSEEK_FORCE;
auto& stream = *static_cast<SeekableStream*>(opaque);
if (whence == AVSEEK_SIZE)
return static_cast<int64_t>(stream.size().value());
auto seek_mode_from_whence = [](int origin) -> SeekMode {
if (origin == SEEK_CUR)
return SeekMode::FromCurrentPosition;
if (origin == SEEK_END)
return SeekMode::FromEndPosition;
return SeekMode::SetPosition;
};
auto offset_or_error = stream.seek(offset, seek_mode_from_whence(whence));
if (offset_or_error.is_error())
return -EIO;
return 0;
});
if (avio_context == nullptr) {
av_free(avio_buffer);
return LoaderError { LoaderError::Category::IO, "Failed to allocate AVIO context" };
}
return make<FFmpegIOContext>(avio_context);
}
FFmpegLoaderPlugin::FFmpegLoaderPlugin(NonnullOwnPtr<SeekableStream> stream, NonnullOwnPtr<FFmpegIOContext> io_context)
: LoaderPlugin(move(stream))
, m_io_context(move(io_context))
{
}
FFmpegLoaderPlugin::~FFmpegLoaderPlugin()
{
if (m_frame != nullptr)
av_frame_free(&m_frame);
if (m_packet != nullptr)
av_packet_free(&m_packet);
if (m_codec_context != nullptr)
avcodec_free_context(&m_codec_context);
if (m_format_context != nullptr)
avformat_close_input(&m_format_context);
}
ErrorOr<NonnullOwnPtr<LoaderPlugin>, LoaderError> FFmpegLoaderPlugin::create(NonnullOwnPtr<SeekableStream> stream)
{
auto io_context = TRY(FFmpegIOContext::create(*stream));
auto loader = make<FFmpegLoaderPlugin>(move(stream), move(io_context));
TRY(loader->initialize());
return loader;
}
MaybeLoaderError FFmpegLoaderPlugin::initialize()
{
// Open the container
m_format_context = avformat_alloc_context();
if (m_format_context == nullptr)
return LoaderError { LoaderError::Category::IO, "Failed to allocate format context" };
m_format_context->pb = m_io_context->avio_context();
if (avformat_open_input(&m_format_context, nullptr, nullptr, nullptr) < 0)
return LoaderError { LoaderError::Category::IO, "Failed to open input for format parsing" };
// Read stream info; doing this is required for headerless formats like MPEG
if (avformat_find_stream_info(m_format_context, nullptr) < 0)
return LoaderError { LoaderError::Category::IO, "Failed to find stream info" };
#ifdef USE_CONSTIFIED_POINTERS
AVCodec const* codec {};
#else
AVCodec* codec {};
#endif
// Find the best stream to play within the container
int best_stream_index = av_find_best_stream(m_format_context, AVMediaType::AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
if (best_stream_index == AVERROR_STREAM_NOT_FOUND)
return LoaderError { LoaderError::Category::Format, "No audio stream found in container" };
if (best_stream_index == AVERROR_DECODER_NOT_FOUND)
return LoaderError { LoaderError::Category::Format, "No suitable decoder found for stream" };
if (best_stream_index < 0)
return LoaderError { LoaderError::Category::Format, "Failed to find an audio stream" };
m_audio_stream = m_format_context->streams[best_stream_index];
// Set up the context to decode the audio stream
m_codec_context = avcodec_alloc_context3(codec);
if (m_codec_context == nullptr)
return LoaderError { LoaderError::Category::IO, "Failed to allocate the codec context" };
if (avcodec_parameters_to_context(m_codec_context, m_audio_stream->codecpar) < 0)
return LoaderError { LoaderError::Category::IO, "Failed to copy codec parameters" };
m_codec_context->pkt_timebase = m_audio_stream->time_base;
m_codec_context->thread_count = AK::min(static_cast<int>(Core::System::hardware_concurrency()), 4);
if (avcodec_open2(m_codec_context, codec, nullptr) < 0)
return LoaderError { LoaderError::Category::IO, "Failed to open input for decoding" };
// This is an initial estimate of the total number of samples in the stream.
// During decoding, we might need to increase the number as more frames come in.
double duration_in_seconds = static_cast<double>(m_audio_stream->duration) * time_base();
if (duration_in_seconds < 0)
return LoaderError { LoaderError::Category::Format, "Negative stream duration" };
m_total_samples = AK::round_to<decltype(m_total_samples)>(sample_rate() * duration_in_seconds);
// Allocate packet (logical chunk of data) and frame (video / audio frame) buffers
m_packet = av_packet_alloc();
if (m_packet == nullptr)
return LoaderError { LoaderError::Category::IO, "Failed to allocate packet" };
m_frame = av_frame_alloc();
if (m_frame == nullptr)
return LoaderError { LoaderError::Category::IO, "Failed to allocate frame" };
return {};
}
double FFmpegLoaderPlugin::time_base() const
{
return av_q2d(m_audio_stream->time_base);
}
bool FFmpegLoaderPlugin::sniff(SeekableStream& stream)
{
auto io_context = MUST(FFmpegIOContext::create(stream));
#ifdef USE_CONSTIFIED_POINTERS
AVInputFormat const* detected_format {};
#else
AVInputFormat* detected_format {};
#endif
auto score = av_probe_input_buffer2(io_context->avio_context(), &detected_format, nullptr, nullptr, 0, BUFFER_MAX_PROBE_SIZE);
return score > 0;
}
static ErrorOr<FixedArray<Sample>> extract_samples_from_frame(AVFrame& frame)
{
size_t number_of_samples = frame.nb_samples;
VERIFY(number_of_samples > 0);
#ifdef USE_FFMPEG_CH_LAYOUT
size_t number_of_channels = frame.ch_layout.nb_channels;
#else
size_t number_of_channels = frame.channels;
#endif
auto format = static_cast<AVSampleFormat>(frame.format);
auto packed_format = av_get_packed_sample_fmt(format);
auto is_planar = av_sample_fmt_is_planar(format) == 1;
// FIXME: handle number_of_channels > 2
if (number_of_channels != 1 && number_of_channels != 2)
return Error::from_string_view("Unsupported number of channels"sv);
switch (format) {
case AV_SAMPLE_FMT_FLTP:
case AV_SAMPLE_FMT_S16:
case AV_SAMPLE_FMT_S32:
break;
default:
// FIXME: handle other formats
return Error::from_string_view("Unsupported sample format"sv);
}
auto get_plane_pointer = [&](size_t channel_index) -> uint8_t* {
return is_planar ? frame.extended_data[channel_index] : frame.extended_data[0];
};
auto index_in_plane = [&](size_t sample_index, size_t channel_index) {
if (is_planar)
return sample_index;
return sample_index * number_of_channels + channel_index;
};
auto read_sample = [&](uint8_t* data, size_t index) -> float {
switch (packed_format) {
case AV_SAMPLE_FMT_FLT:
return reinterpret_cast<float*>(data)[index];
case AV_SAMPLE_FMT_S16:
return reinterpret_cast<i16*>(data)[index] / static_cast<float>(NumericLimits<i16>::max());
case AV_SAMPLE_FMT_S32:
return reinterpret_cast<i32*>(data)[index] / static_cast<float>(NumericLimits<i32>::max());
default:
VERIFY_NOT_REACHED();
}
};
auto samples = TRY(FixedArray<Sample>::create(number_of_samples));
for (size_t sample = 0; sample < number_of_samples; ++sample) {
if (number_of_channels == 1) {
samples.unchecked_at(sample) = Sample { read_sample(get_plane_pointer(0), index_in_plane(sample, 0)) };
} else {
samples.unchecked_at(sample) = Sample {
read_sample(get_plane_pointer(0), index_in_plane(sample, 0)),
read_sample(get_plane_pointer(1), index_in_plane(sample, 1)),
};
}
}
return samples;
}
ErrorOr<Vector<FixedArray<Sample>>, LoaderError> FFmpegLoaderPlugin::load_chunks(size_t samples_to_read_from_input)
{
Vector<FixedArray<Sample>> chunks {};
do {
// Obtain a packet
if (av_read_frame(m_format_context, m_packet) < 0)
return LoaderError { LoaderError::Category::IO, "Failed to read frame" };
if (m_packet->stream_index != m_audio_stream->index) {
av_packet_unref(m_packet);
continue;
}
// Send the packet to the decoder
if (avcodec_send_packet(m_codec_context, m_packet) < 0)
return LoaderError { LoaderError::Category::IO, "Failed to send packet" };
av_packet_unref(m_packet);
// Ask the decoder for a new frame. We might not have sent enough data yet
auto receive_frame_error = avcodec_receive_frame(m_codec_context, m_frame);
if (receive_frame_error != 0) {
if (receive_frame_error == AVERROR(EAGAIN))
continue;
if (receive_frame_error == AVERROR_EOF)
return Error::from_errno(EOF);
return LoaderError { LoaderError::Category::IO, "Failed to receive frame" };
}
chunks.append(TRY(extract_samples_from_frame(*m_frame)));
// Use the frame's presentation timestamp to set the number of loaded samples
m_loaded_samples = static_cast<int>(m_frame->pts * sample_rate() * time_base());
if (m_loaded_samples > m_total_samples) [[unlikely]]
m_total_samples = m_loaded_samples;
samples_to_read_from_input -= AK::min(samples_to_read_from_input, m_frame->nb_samples);
} while (samples_to_read_from_input > 0);
return chunks;
}
MaybeLoaderError FFmpegLoaderPlugin::reset()
{
return seek(0);
}
MaybeLoaderError FFmpegLoaderPlugin::seek(int sample_index)
{
auto sample_position_in_seconds = static_cast<double>(sample_index) / sample_rate();
auto sample_timestamp = AK::round_to<int64_t>(sample_position_in_seconds / time_base());
if (av_seek_frame(m_format_context, m_audio_stream->index, sample_timestamp, AVSEEK_FLAG_ANY) < 0)
return LoaderError { LoaderError::Category::IO, "Failed to seek" };
avcodec_flush_buffers(m_codec_context);
m_loaded_samples = sample_index;
return {};
}
u32 FFmpegLoaderPlugin::sample_rate()
{
VERIFY(m_codec_context != nullptr);
return m_codec_context->sample_rate;
}
u16 FFmpegLoaderPlugin::num_channels()
{
VERIFY(m_codec_context != nullptr);
#ifdef USE_FFMPEG_CH_LAYOUT
return m_codec_context->ch_layout.nb_channels;
#else
return m_codec_context->channels;
#endif
}
PcmSampleFormat FFmpegLoaderPlugin::pcm_format()
{
// FIXME: pcm_format() is unused, always return Float for now
return PcmSampleFormat::Float32;
}
ByteString FFmpegLoaderPlugin::format_name()
{
if (!m_format_context)
return "unknown";
return m_format_context->iformat->name;
}
}