From 455a802a16692ddaf070822aad260d75d443baef Mon Sep 17 00:00:00 2001 From: Romain Vimont Date: Sat, 25 Feb 2023 21:21:15 +0100 Subject: [PATCH] swr --- app/meson.build | 1 + app/src/audio_player.c | 128 ++++++++++++++---- app/src/audio_player.h | 14 ++ app/src/decoder.c | 2 +- .../com/genymobile/scrcpy/AudioEncoder.java | 2 +- 5 files changed, 115 insertions(+), 32 deletions(-) diff --git a/app/meson.build b/app/meson.build index 562a5358..9e0aad74 100644 --- a/app/meson.build +++ b/app/meson.build @@ -101,6 +101,7 @@ if not crossbuild_windows dependency('libavformat', version: '>= 57.33'), dependency('libavcodec', version: '>= 57.37'), dependency('libavutil'), + dependency('libswresample'), dependency('sdl2', version: '>= 2.0.5'), ] diff --git a/app/src/audio_player.c b/app/src/audio_player.c index f1649c60..1ff08107 100644 --- a/app/src/audio_player.c +++ b/app/src/audio_player.c @@ -1,10 +1,15 @@ #include "audio_player.h" +#include + #include "util/log.h" /** Downcast frame_sink to sc_v4l2_sink */ #define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink) +#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_S16 +#define SC_SDL_SAMPLE_FMT AUDIO_S16 + void sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) { struct sc_audio_player *ap = userdata; @@ -28,20 +33,29 @@ sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) { } } -static SDL_AudioFormat -sc_audio_player_ffmpeg_to_sdl_format(enum AVSampleFormat format) { - switch (format) { - case AV_SAMPLE_FMT_S16: - return AUDIO_S16; - case AV_SAMPLE_FMT_S32: - return AUDIO_S32; - case AV_SAMPLE_FMT_FLT: - return AUDIO_F32; - default: - LOGE("Unsupported FFmpeg sample format: %s", - av_get_sample_fmt_name(format)); - return 0; +static size_t +sc_audio_player_get_swr_buf_size(struct sc_audio_player *ap, size_t samples) { + assert(ap->nb_channels); + assert(ap->out_bytes_per_sample); + return samples * ap->nb_channels * ap->out_bytes_per_sample; +} + +static uint8_t * +sc_audio_player_get_swr_buf(struct sc_audio_player *ap, size_t min_samples) { + size_t min_buf_size = sc_audio_player_get_swr_buf_size(ap, min_samples); + if (min_buf_size < ap->swr_buf_alloc_size) { + size_t new_size = min_buf_size + 4096; + uint8_t *buf = realloc(ap->swr_buf, new_size); + if (!buf) { + LOG_OOM(); + // Could not realloc to the requested size + return NULL; + } + ap->swr_buf = buf; + ap->swr_buf_alloc_size = new_size; } + + return ap->swr_buf; } static bool @@ -49,20 +63,45 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink, const AVCodecContext *ctx) { struct sc_audio_player *ap = DOWNCAST(sink); - SDL_AudioFormat format = - sc_audio_player_ffmpeg_to_sdl_format(ctx->sample_fmt); - if (!format) { - // error already logged - //return false; - format = AUDIO_F32; // it's planar, but for now there is only 1 channel + SwrContext *swr_ctx = ap->swr_ctx; + assert(swr_ctx); + + assert(ctx->sample_rate > 0); + assert(ctx->ch_layout.nb_channels > 0); + assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT)); + int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT); + assert(out_bytes_per_sample > 0); + + av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0); + av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0); + av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0); + + av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0); + av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0); + av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0); + + int ret = swr_init(swr_ctx); + if (ret) { + LOGE("Failed to initialize the resampling context"); + return false; + } + + ap->sample_rate = ctx->sample_rate; + ap->nb_channels = ctx->ch_layout.nb_channels; + ap->out_bytes_per_sample = out_bytes_per_sample; + + size_t initial_swr_buf_size = sc_audio_player_get_swr_buf_size(ap, 4096); + ap->swr_buf = malloc(initial_swr_buf_size); + if (!ap->swr_buf) { + LOG_OOM(); + return false; } - LOGI("%d\n", ctx->sample_rate); SDL_AudioSpec desired = { .freq = ctx->sample_rate, - .format = format, + .format = SC_SDL_SAMPLE_FMT, .channels = ctx->ch_layout.nb_channels, - .samples = 2048, + .samples = 512, // ~10ms at 48000Hz .callback = sc_audio_player_sdl_callback, .userdata = ap, }; @@ -92,24 +131,41 @@ static bool sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, const AVFrame *frame) { struct sc_audio_player *ap = DOWNCAST(sink); - const uint8_t *data = frame->data[0]; - size_t size = frame->linesize[0]; + SwrContext *swr_ctx = ap->swr_ctx; + + int64_t delay = swr_get_delay(swr_ctx, ap->sample_rate); + // No need to av_rescale_rnd(), input and output sample rates are the same + int dst_nb_samples = delay + frame->nb_samples; + + uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, frame->nb_samples); + if (!swr_buf) { + return false; + } + + int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples, + (const uint8_t **) frame->data, frame->nb_samples); + if (ret < 0) { + LOGE("Resampling failed: %d", ret); + return false; + } + LOGI("ret=%d dst_nb_samples=%d\n", ret, dst_nb_samples); + + size_t swr_buf_size = sc_audio_player_get_swr_buf_size(ap, ret); + LOGI("== swr_buf_size %lu", swr_buf_size); - // TODO convert to non planar format - // TODO then re-enable stereo // TODO clock drift compensation // It should almost always be possible to write without lock - bool can_write_without_lock = size <= ap->safe_empty_buffer; + bool can_write_without_lock = swr_buf_size <= ap->safe_empty_buffer; if (can_write_without_lock) { - sc_bytebuf_prepare_write(&ap->buf, data, size); + sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size); } SDL_LockAudioDevice(ap->device); if (can_write_without_lock) { - sc_bytebuf_commit_write(&ap->buf, size); + sc_bytebuf_commit_write(&ap->buf, swr_buf_size); } else { - sc_bytebuf_write(&ap->buf, data, size); + sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size); } // The next time, it will remain at least the current empty space @@ -128,8 +184,18 @@ sc_audio_player_init(struct sc_audio_player *ap, return false; } + ap->swr_ctx = swr_alloc(); + if (!ap->swr_ctx) { + sc_bytebuf_destroy(&ap->buf); + LOG_OOM(); + return false; + } + ap->safe_empty_buffer = sc_bytebuf_write_remaining(&ap->buf); + ap->swr_buf = NULL; + ap->swr_buf_alloc_size = 0; + assert(cbs && cbs->on_ended); ap->cbs = cbs; ap->cbs_userdata = cbs_userdata; @@ -147,4 +213,6 @@ sc_audio_player_init(struct sc_audio_player *ap, void sc_audio_player_destroy(struct sc_audio_player *ap) { sc_bytebuf_destroy(&ap->buf); + swr_free(&ap->swr_ctx); + free(ap->swr_buf); } diff --git a/app/src/audio_player.h b/app/src/audio_player.h index fbe6aac0..c2d3b37e 100644 --- a/app/src/audio_player.h +++ b/app/src/audio_player.h @@ -9,6 +9,7 @@ #include #include +#include #include struct sc_audio_player { @@ -21,6 +22,19 @@ struct sc_audio_player { // Number of bytes which could be written without locking size_t safe_empty_buffer; + struct SwrContext *swr_ctx; + + // The sample rate is the same for input and output + unsigned sample_rate; + // The number of channels is the same for input and output + unsigned nb_channels; + + unsigned out_bytes_per_sample; + + // Target buffer for resampling + uint8_t *swr_buf; + size_t swr_buf_alloc_size; + const struct sc_audio_player_callbacks *cbs; void *cbs_userdata; }; diff --git a/app/src/decoder.c b/app/src/decoder.c index 644d410e..ef66bb08 100644 --- a/app/src/decoder.c +++ b/app/src/decoder.c @@ -54,7 +54,7 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) { } else { // Hardcoded audio properties decoder->codec_ctx->ch_layout = - (AVChannelLayout) AV_CHANNEL_LAYOUT_MONO; + (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO; decoder->codec_ctx->sample_rate = 48000; } diff --git a/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java b/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java index 8f950d47..3b012b11 100644 --- a/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java +++ b/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java @@ -40,7 +40,7 @@ public final class AudioEncoder { } private static final int SAMPLE_RATE = 48000; - private static final int CHANNELS = 1; + private static final int CHANNELS = 2; private static final int BUFFER_MS = 10; // milliseconds private static final int BUFFER_SIZE = SAMPLE_RATE * CHANNELS * BUFFER_MS / 1000;