This commit is contained in:
Romain Vimont 2023-02-25 21:21:15 +01:00
parent 4162dd89bc
commit 455a802a16
5 changed files with 115 additions and 32 deletions

View file

@ -101,6 +101,7 @@ if not crossbuild_windows
dependency('libavformat', version: '>= 57.33'),
dependency('libavcodec', version: '>= 57.37'),
dependency('libavutil'),
dependency('libswresample'),
dependency('sdl2', version: '>= 2.0.5'),
]

View file

@ -1,10 +1,15 @@
#include "audio_player.h"
#include <libavutil/opt.h>
#include "util/log.h"
/** Downcast frame_sink to sc_v4l2_sink */
#define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink)
#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_S16
#define SC_SDL_SAMPLE_FMT AUDIO_S16
void
sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
struct sc_audio_player *ap = userdata;
@ -28,20 +33,29 @@ sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
}
}
static SDL_AudioFormat
sc_audio_player_ffmpeg_to_sdl_format(enum AVSampleFormat format) {
switch (format) {
case AV_SAMPLE_FMT_S16:
return AUDIO_S16;
case AV_SAMPLE_FMT_S32:
return AUDIO_S32;
case AV_SAMPLE_FMT_FLT:
return AUDIO_F32;
default:
LOGE("Unsupported FFmpeg sample format: %s",
av_get_sample_fmt_name(format));
return 0;
static size_t
sc_audio_player_get_swr_buf_size(struct sc_audio_player *ap, size_t samples) {
assert(ap->nb_channels);
assert(ap->out_bytes_per_sample);
return samples * ap->nb_channels * ap->out_bytes_per_sample;
}
static uint8_t *
sc_audio_player_get_swr_buf(struct sc_audio_player *ap, size_t min_samples) {
size_t min_buf_size = sc_audio_player_get_swr_buf_size(ap, min_samples);
if (min_buf_size < ap->swr_buf_alloc_size) {
size_t new_size = min_buf_size + 4096;
uint8_t *buf = realloc(ap->swr_buf, new_size);
if (!buf) {
LOG_OOM();
// Could not realloc to the requested size
return NULL;
}
ap->swr_buf = buf;
ap->swr_buf_alloc_size = new_size;
}
return ap->swr_buf;
}
static bool
@ -49,20 +63,45 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
const AVCodecContext *ctx) {
struct sc_audio_player *ap = DOWNCAST(sink);
SDL_AudioFormat format =
sc_audio_player_ffmpeg_to_sdl_format(ctx->sample_fmt);
if (!format) {
// error already logged
//return false;
format = AUDIO_F32; // it's planar, but for now there is only 1 channel
SwrContext *swr_ctx = ap->swr_ctx;
assert(swr_ctx);
assert(ctx->sample_rate > 0);
assert(ctx->ch_layout.nb_channels > 0);
assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
assert(out_bytes_per_sample > 0);
av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0);
av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0);
int ret = swr_init(swr_ctx);
if (ret) {
LOGE("Failed to initialize the resampling context");
return false;
}
ap->sample_rate = ctx->sample_rate;
ap->nb_channels = ctx->ch_layout.nb_channels;
ap->out_bytes_per_sample = out_bytes_per_sample;
size_t initial_swr_buf_size = sc_audio_player_get_swr_buf_size(ap, 4096);
ap->swr_buf = malloc(initial_swr_buf_size);
if (!ap->swr_buf) {
LOG_OOM();
return false;
}
LOGI("%d\n", ctx->sample_rate);
SDL_AudioSpec desired = {
.freq = ctx->sample_rate,
.format = format,
.format = SC_SDL_SAMPLE_FMT,
.channels = ctx->ch_layout.nb_channels,
.samples = 2048,
.samples = 512, // ~10ms at 48000Hz
.callback = sc_audio_player_sdl_callback,
.userdata = ap,
};
@ -92,24 +131,41 @@ static bool
sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, const AVFrame *frame) {
struct sc_audio_player *ap = DOWNCAST(sink);
const uint8_t *data = frame->data[0];
size_t size = frame->linesize[0];
SwrContext *swr_ctx = ap->swr_ctx;
int64_t delay = swr_get_delay(swr_ctx, ap->sample_rate);
// No need to av_rescale_rnd(), input and output sample rates are the same
int dst_nb_samples = delay + frame->nb_samples;
uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, frame->nb_samples);
if (!swr_buf) {
return false;
}
int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples,
(const uint8_t **) frame->data, frame->nb_samples);
if (ret < 0) {
LOGE("Resampling failed: %d", ret);
return false;
}
LOGI("ret=%d dst_nb_samples=%d\n", ret, dst_nb_samples);
size_t swr_buf_size = sc_audio_player_get_swr_buf_size(ap, ret);
LOGI("== swr_buf_size %lu", swr_buf_size);
// TODO convert to non planar format
// TODO then re-enable stereo
// TODO clock drift compensation
// It should almost always be possible to write without lock
bool can_write_without_lock = size <= ap->safe_empty_buffer;
bool can_write_without_lock = swr_buf_size <= ap->safe_empty_buffer;
if (can_write_without_lock) {
sc_bytebuf_prepare_write(&ap->buf, data, size);
sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size);
}
SDL_LockAudioDevice(ap->device);
if (can_write_without_lock) {
sc_bytebuf_commit_write(&ap->buf, size);
sc_bytebuf_commit_write(&ap->buf, swr_buf_size);
} else {
sc_bytebuf_write(&ap->buf, data, size);
sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size);
}
// The next time, it will remain at least the current empty space
@ -128,8 +184,18 @@ sc_audio_player_init(struct sc_audio_player *ap,
return false;
}
ap->swr_ctx = swr_alloc();
if (!ap->swr_ctx) {
sc_bytebuf_destroy(&ap->buf);
LOG_OOM();
return false;
}
ap->safe_empty_buffer = sc_bytebuf_write_remaining(&ap->buf);
ap->swr_buf = NULL;
ap->swr_buf_alloc_size = 0;
assert(cbs && cbs->on_ended);
ap->cbs = cbs;
ap->cbs_userdata = cbs_userdata;
@ -147,4 +213,6 @@ sc_audio_player_init(struct sc_audio_player *ap,
void
sc_audio_player_destroy(struct sc_audio_player *ap) {
sc_bytebuf_destroy(&ap->buf);
swr_free(&ap->swr_ctx);
free(ap->swr_buf);
}

View file

@ -9,6 +9,7 @@
#include <util/thread.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
#include <SDL2/SDL.h>
struct sc_audio_player {
@ -21,6 +22,19 @@ struct sc_audio_player {
// Number of bytes which could be written without locking
size_t safe_empty_buffer;
struct SwrContext *swr_ctx;
// The sample rate is the same for input and output
unsigned sample_rate;
// The number of channels is the same for input and output
unsigned nb_channels;
unsigned out_bytes_per_sample;
// Target buffer for resampling
uint8_t *swr_buf;
size_t swr_buf_alloc_size;
const struct sc_audio_player_callbacks *cbs;
void *cbs_userdata;
};

View file

@ -54,7 +54,7 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) {
} else {
// Hardcoded audio properties
decoder->codec_ctx->ch_layout =
(AVChannelLayout) AV_CHANNEL_LAYOUT_MONO;
(AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
decoder->codec_ctx->sample_rate = 48000;
}

View file

@ -40,7 +40,7 @@ public final class AudioEncoder {
}
private static final int SAMPLE_RATE = 48000;
private static final int CHANNELS = 1;
private static final int CHANNELS = 2;
private static final int BUFFER_MS = 10; // milliseconds
private static final int BUFFER_SIZE = SAMPLE_RATE * CHANNELS * BUFFER_MS / 1000;