/* * Copyright (c) 2024, Jelle Raaijmakers * * SPDX-License-Identifier: BSD-2-Clause */ #include "FFmpegLoader.h" #include #include #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) # define USE_FFMPEG_CH_LAYOUT #endif #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(59, 0, 100) # define USE_CONSTIFIED_POINTERS #endif namespace Audio { static constexpr int BUFFER_MAX_PROBE_SIZE = 64 * KiB; FFmpegIOContext::FFmpegIOContext(AVIOContext* avio_context) : m_avio_context(avio_context) { } FFmpegIOContext::~FFmpegIOContext() { // NOTE: free the buffer inside the AVIO context, since it might be changed since its initial allocation av_free(m_avio_context->buffer); avio_context_free(&m_avio_context); } ErrorOr> FFmpegIOContext::create(AK::SeekableStream& stream) { auto* avio_buffer = av_malloc(PAGE_SIZE); if (avio_buffer == nullptr) return Error::from_string_literal("Failed to allocate AVIO buffer"); // This AVIOContext explains to avformat how to interact with our stream auto* avio_context = avio_alloc_context( static_cast(avio_buffer), PAGE_SIZE, 0, &stream, [](void* opaque, u8* buffer, int size) -> int { auto& stream = *static_cast(opaque); AK::Bytes buffer_bytes { buffer, AK::min(size, PAGE_SIZE) }; auto read_bytes_or_error = stream.read_some(buffer_bytes); if (read_bytes_or_error.is_error()) { if (read_bytes_or_error.error().code() == EOF) return AVERROR_EOF; return AVERROR_UNKNOWN; } int number_of_bytes_read = read_bytes_or_error.value().size(); if (number_of_bytes_read == 0) return AVERROR_EOF; return number_of_bytes_read; }, nullptr, [](void* opaque, int64_t offset, int whence) -> int64_t { whence &= ~AVSEEK_FORCE; auto& stream = *static_cast(opaque); if (whence == AVSEEK_SIZE) return static_cast(stream.size().value()); auto seek_mode_from_whence = [](int origin) -> SeekMode { if (origin == SEEK_CUR) return SeekMode::FromCurrentPosition; if (origin == SEEK_END) return SeekMode::FromEndPosition; return SeekMode::SetPosition; }; auto offset_or_error = stream.seek(offset, seek_mode_from_whence(whence)); if (offset_or_error.is_error()) return -EIO; return 0; }); if (avio_context == nullptr) { av_free(avio_buffer); return Error::from_string_literal("Failed to allocate AVIO context"); } return make(avio_context); } FFmpegLoaderPlugin::FFmpegLoaderPlugin(NonnullOwnPtr stream, NonnullOwnPtr io_context) : LoaderPlugin(move(stream)) , m_io_context(move(io_context)) { } FFmpegLoaderPlugin::~FFmpegLoaderPlugin() { if (m_frame != nullptr) av_frame_free(&m_frame); if (m_packet != nullptr) av_packet_free(&m_packet); if (m_codec_context != nullptr) avcodec_free_context(&m_codec_context); if (m_format_context != nullptr) avformat_close_input(&m_format_context); } ErrorOr> FFmpegLoaderPlugin::create(NonnullOwnPtr stream) { auto io_context = TRY(FFmpegIOContext::create(*stream)); auto loader = make(move(stream), move(io_context)); TRY(loader->initialize()); return loader; } ErrorOr FFmpegLoaderPlugin::initialize() { // Open the container m_format_context = avformat_alloc_context(); if (m_format_context == nullptr) return Error::from_string_literal("Failed to allocate format context"); m_format_context->pb = m_io_context->avio_context(); if (avformat_open_input(&m_format_context, nullptr, nullptr, nullptr) < 0) return Error::from_string_literal("Failed to open input for format parsing"); // Read stream info; doing this is required for headerless formats like MPEG if (avformat_find_stream_info(m_format_context, nullptr) < 0) return Error::from_string_literal("Failed to find stream info"); #ifdef USE_CONSTIFIED_POINTERS AVCodec const* codec {}; #else AVCodec* codec {}; #endif // Find the best stream to play within the container int best_stream_index = av_find_best_stream(m_format_context, AVMediaType::AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0); if (best_stream_index == AVERROR_STREAM_NOT_FOUND) return Error::from_string_literal("No audio stream found in container"); if (best_stream_index == AVERROR_DECODER_NOT_FOUND) return Error::from_string_literal("No suitable decoder found for stream"); if (best_stream_index < 0) return Error::from_string_literal("Failed to find an audio stream"); m_audio_stream = m_format_context->streams[best_stream_index]; // Set up the context to decode the audio stream m_codec_context = avcodec_alloc_context3(codec); if (m_codec_context == nullptr) return Error::from_string_literal("Failed to allocate the codec context"); if (avcodec_parameters_to_context(m_codec_context, m_audio_stream->codecpar) < 0) return Error::from_string_literal("Failed to copy codec parameters"); m_codec_context->pkt_timebase = m_audio_stream->time_base; m_codec_context->thread_count = AK::min(static_cast(Core::System::hardware_concurrency()), 4); if (avcodec_open2(m_codec_context, codec, nullptr) < 0) return Error::from_string_literal("Failed to open input for decoding"); // This is an initial estimate of the total number of samples in the stream. // During decoding, we might need to increase the number as more frames come in. double duration_in_seconds = static_cast(m_audio_stream->duration) * time_base(); if (duration_in_seconds < 0) return Error::from_string_literal("Negative stream duration"); m_total_samples = AK::round_to(sample_rate() * duration_in_seconds); // Allocate packet (logical chunk of data) and frame (video / audio frame) buffers m_packet = av_packet_alloc(); if (m_packet == nullptr) return Error::from_string_literal("Failed to allocate packet"); m_frame = av_frame_alloc(); if (m_frame == nullptr) return Error::from_string_literal("Failed to allocate frame"); return {}; } double FFmpegLoaderPlugin::time_base() const { return av_q2d(m_audio_stream->time_base); } bool FFmpegLoaderPlugin::sniff(SeekableStream& stream) { auto io_context = MUST(FFmpegIOContext::create(stream)); #ifdef USE_CONSTIFIED_POINTERS AVInputFormat const* detected_format {}; #else AVInputFormat* detected_format {}; #endif auto score = av_probe_input_buffer2(io_context->avio_context(), &detected_format, nullptr, nullptr, 0, BUFFER_MAX_PROBE_SIZE); return score > 0; } static ErrorOr> extract_samples_from_frame(AVFrame& frame) { size_t number_of_samples = frame.nb_samples; VERIFY(number_of_samples > 0); #ifdef USE_FFMPEG_CH_LAYOUT size_t number_of_channels = frame.ch_layout.nb_channels; #else size_t number_of_channels = frame.channels; #endif auto format = static_cast(frame.format); auto packed_format = av_get_packed_sample_fmt(format); auto is_planar = av_sample_fmt_is_planar(format) == 1; // FIXME: handle number_of_channels > 2 if (number_of_channels != 1 && number_of_channels != 2) return Error::from_string_view("Unsupported number of channels"sv); switch (format) { case AV_SAMPLE_FMT_FLTP: case AV_SAMPLE_FMT_S16: case AV_SAMPLE_FMT_S32: break; default: // FIXME: handle other formats return Error::from_string_view("Unsupported sample format"sv); } auto get_plane_pointer = [&](size_t channel_index) -> uint8_t* { return is_planar ? frame.extended_data[channel_index] : frame.extended_data[0]; }; auto index_in_plane = [&](size_t sample_index, size_t channel_index) { if (is_planar) return sample_index; return sample_index * number_of_channels + channel_index; }; auto read_sample = [&](uint8_t* data, size_t index) -> float { switch (packed_format) { case AV_SAMPLE_FMT_FLT: return reinterpret_cast(data)[index]; case AV_SAMPLE_FMT_S16: return reinterpret_cast(data)[index] / static_cast(NumericLimits::max()); case AV_SAMPLE_FMT_S32: return reinterpret_cast(data)[index] / static_cast(NumericLimits::max()); default: VERIFY_NOT_REACHED(); } }; auto samples = TRY(FixedArray::create(number_of_samples)); for (size_t sample = 0; sample < number_of_samples; ++sample) { if (number_of_channels == 1) { samples.unchecked_at(sample) = Sample { read_sample(get_plane_pointer(0), index_in_plane(sample, 0)) }; } else { samples.unchecked_at(sample) = Sample { read_sample(get_plane_pointer(0), index_in_plane(sample, 0)), read_sample(get_plane_pointer(1), index_in_plane(sample, 1)), }; } } return samples; } ErrorOr>> FFmpegLoaderPlugin::load_chunks(size_t samples_to_read_from_input) { Vector> chunks {}; do { // Obtain a packet auto read_frame_error = av_read_frame(m_format_context, m_packet); if (read_frame_error < 0) { if (read_frame_error == AVERROR_EOF) break; return Error::from_string_literal("Failed to read frame"); } if (m_packet->stream_index != m_audio_stream->index) { av_packet_unref(m_packet); continue; } // Send the packet to the decoder if (avcodec_send_packet(m_codec_context, m_packet) < 0) return Error::from_string_literal("Failed to send packet"); av_packet_unref(m_packet); // Ask the decoder for a new frame. We might not have sent enough data yet auto receive_frame_error = avcodec_receive_frame(m_codec_context, m_frame); if (receive_frame_error != 0) { if (receive_frame_error == AVERROR(EAGAIN)) continue; if (receive_frame_error == AVERROR_EOF) break; return Error::from_string_literal("Failed to receive frame"); } chunks.append(TRY(extract_samples_from_frame(*m_frame))); // Use the frame's presentation timestamp to set the number of loaded samples m_loaded_samples = static_cast(m_frame->pts * sample_rate() * time_base()); if (m_loaded_samples > m_total_samples) [[unlikely]] m_total_samples = m_loaded_samples; samples_to_read_from_input -= AK::min(samples_to_read_from_input, m_frame->nb_samples); } while (samples_to_read_from_input > 0); return chunks; } ErrorOr FFmpegLoaderPlugin::reset() { return seek(0); } ErrorOr FFmpegLoaderPlugin::seek(int sample_index) { auto sample_position_in_seconds = static_cast(sample_index) / sample_rate(); auto sample_timestamp = AK::round_to(sample_position_in_seconds / time_base()); if (av_seek_frame(m_format_context, m_audio_stream->index, sample_timestamp, AVSEEK_FLAG_ANY) < 0) return Error::from_string_literal("Failed to seek"); avcodec_flush_buffers(m_codec_context); m_loaded_samples = sample_index; return {}; } u32 FFmpegLoaderPlugin::sample_rate() { VERIFY(m_codec_context != nullptr); return m_codec_context->sample_rate; } u16 FFmpegLoaderPlugin::num_channels() { VERIFY(m_codec_context != nullptr); #ifdef USE_FFMPEG_CH_LAYOUT return m_codec_context->ch_layout.nb_channels; #else return m_codec_context->channels; #endif } PcmSampleFormat FFmpegLoaderPlugin::pcm_format() { // FIXME: pcm_format() is unused, always return Float for now return PcmSampleFormat::Float32; } ByteString FFmpegLoaderPlugin::format_name() { if (!m_format_context) return "unknown"; return m_format_context->iformat->name; } }