swr

2025-04-22 12:35:01 +00:00 · 2023-02-25 21:21:15 +01:00 · 2023-02-25 21:21:15 +01:00 · 455a802a16
commit 455a802a16
parent 4162dd89bc
5 changed files with 115 additions and 32 deletions
--- a/app/meson.build
+++ b/app/meson.build
@ -101,6 +101,7 @@ if not crossbuild_windows
        dependency('libavformat', version: '>= 57.33'),
        dependency('libavcodec', version: '>= 57.37'),
        dependency('libavutil'),
+        dependency('libswresample'),
        dependency('sdl2', version: '>= 2.0.5'),
    ]

--- a/app/src/audio_player.c
+++ b/app/src/audio_player.c
@ -1,10 +1,15 @@
 #include "audio_player.h"

+#include <libavutil/opt.h>
+
 #include "util/log.h"

 /** Downcast frame_sink to sc_v4l2_sink */
 #define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink)

+#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_S16
+#define SC_SDL_SAMPLE_FMT AUDIO_S16
+
 void
 sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
    struct sc_audio_player *ap = userdata;
@ -28,20 +33,29 @@ sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
    }
 }

-static SDL_AudioFormat
-sc_audio_player_ffmpeg_to_sdl_format(enum AVSampleFormat format) {
-    switch (format) {
-        case AV_SAMPLE_FMT_S16:
-            return AUDIO_S16;
-        case AV_SAMPLE_FMT_S32:
-            return AUDIO_S32;
-        case AV_SAMPLE_FMT_FLT:
-            return AUDIO_F32;
-        default:
-            LOGE("Unsupported FFmpeg sample format: %s",
-                 av_get_sample_fmt_name(format));
-            return 0;
+static size_t
+sc_audio_player_get_swr_buf_size(struct sc_audio_player *ap, size_t samples) {
+    assert(ap->nb_channels);
+    assert(ap->out_bytes_per_sample);
+    return samples * ap->nb_channels * ap->out_bytes_per_sample;
+}
+
+static uint8_t *
+sc_audio_player_get_swr_buf(struct sc_audio_player *ap, size_t min_samples) {
+    size_t min_buf_size = sc_audio_player_get_swr_buf_size(ap, min_samples);
+    if (min_buf_size < ap->swr_buf_alloc_size) {
+        size_t new_size = min_buf_size + 4096;
+        uint8_t *buf = realloc(ap->swr_buf, new_size);
+        if (!buf) {
+            LOG_OOM();
+            // Could not realloc to the requested size
+            return NULL;
+        }
+        ap->swr_buf = buf;
+        ap->swr_buf_alloc_size = new_size;
    }
+
+    return ap->swr_buf;
 }

 static bool
@ -49,20 +63,45 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
                                const AVCodecContext *ctx) {
    struct sc_audio_player *ap = DOWNCAST(sink);

-    SDL_AudioFormat format =
-        sc_audio_player_ffmpeg_to_sdl_format(ctx->sample_fmt);
-    if (!format) {
-        // error already logged
-        //return false;
-        format = AUDIO_F32; // it's planar, but for now there is only 1 channel
+    SwrContext *swr_ctx = ap->swr_ctx;
+    assert(swr_ctx);
+
+    assert(ctx->sample_rate > 0);
+    assert(ctx->ch_layout.nb_channels > 0);
+    assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
+    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
+    assert(out_bytes_per_sample > 0);
+
+    av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
+    av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0);
+    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0);
+
+    av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
+    av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0);
+    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0);
+
+    int ret = swr_init(swr_ctx);
+    if (ret) {
+        LOGE("Failed to initialize the resampling context");
+        return false;
+    }
+
+    ap->sample_rate = ctx->sample_rate;
+    ap->nb_channels = ctx->ch_layout.nb_channels;
+    ap->out_bytes_per_sample = out_bytes_per_sample;
+
+    size_t initial_swr_buf_size = sc_audio_player_get_swr_buf_size(ap, 4096);
+    ap->swr_buf = malloc(initial_swr_buf_size);
+    if (!ap->swr_buf) {
+        LOG_OOM();
+        return false;
    }
-    LOGI("%d\n", ctx->sample_rate);

    SDL_AudioSpec desired = {
        .freq = ctx->sample_rate,
-        .format = format,
+        .format = SC_SDL_SAMPLE_FMT,
        .channels = ctx->ch_layout.nb_channels,
-        .samples = 2048,
+        .samples = 512, // ~10ms at 48000Hz
        .callback = sc_audio_player_sdl_callback,
        .userdata = ap,
    };
@ -92,24 +131,41 @@ static bool
 sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, const AVFrame *frame) {
    struct sc_audio_player *ap = DOWNCAST(sink);

-    const uint8_t *data = frame->data[0];
-    size_t size = frame->linesize[0];
+    SwrContext *swr_ctx = ap->swr_ctx;
+
+    int64_t delay = swr_get_delay(swr_ctx, ap->sample_rate);
+    // No need to av_rescale_rnd(), input and output sample rates are the same
+    int dst_nb_samples = delay + frame->nb_samples;
+
+    uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, frame->nb_samples);
+    if (!swr_buf) {
+        return false;
+    }
+
+    int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples,
+                          (const uint8_t **) frame->data, frame->nb_samples);
+    if (ret < 0) {
+        LOGE("Resampling failed: %d", ret);
+        return false;
+    }
+    LOGI("ret=%d dst_nb_samples=%d\n", ret, dst_nb_samples);
+
+    size_t swr_buf_size = sc_audio_player_get_swr_buf_size(ap, ret);
+    LOGI("== swr_buf_size %lu", swr_buf_size);

-    // TODO convert to non planar format
-    // TODO then re-enable stereo
    // TODO clock drift compensation

    // It should almost always be possible to write without lock
-    bool can_write_without_lock = size <= ap->safe_empty_buffer;
+    bool can_write_without_lock = swr_buf_size <= ap->safe_empty_buffer;
    if (can_write_without_lock) {
-        sc_bytebuf_prepare_write(&ap->buf, data, size);
+        sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size);
    }

    SDL_LockAudioDevice(ap->device);
    if (can_write_without_lock) {
-        sc_bytebuf_commit_write(&ap->buf, size);
+        sc_bytebuf_commit_write(&ap->buf, swr_buf_size);
    } else {
-        sc_bytebuf_write(&ap->buf, data, size);
+        sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size);
    }

    // The next time, it will remain at least the current empty space
@ -128,8 +184,18 @@ sc_audio_player_init(struct sc_audio_player *ap,
        return false;
    }

+    ap->swr_ctx = swr_alloc();
+    if (!ap->swr_ctx) {
+        sc_bytebuf_destroy(&ap->buf);
+        LOG_OOM();
+        return false;
+    }
+
    ap->safe_empty_buffer = sc_bytebuf_write_remaining(&ap->buf);

+    ap->swr_buf = NULL;
+    ap->swr_buf_alloc_size = 0;
+
    assert(cbs && cbs->on_ended);
    ap->cbs = cbs;
    ap->cbs_userdata = cbs_userdata;
@ -147,4 +213,6 @@ sc_audio_player_init(struct sc_audio_player *ap,
 void
 sc_audio_player_destroy(struct sc_audio_player *ap) {
    sc_bytebuf_destroy(&ap->buf);
+    swr_free(&ap->swr_ctx);
+    free(ap->swr_buf);
 }
--- a/app/src/audio_player.h
+++ b/app/src/audio_player.h
@ -9,6 +9,7 @@
 #include <util/thread.h>

 #include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
 #include <SDL2/SDL.h>

 struct sc_audio_player {
@ -21,6 +22,19 @@ struct sc_audio_player {
    // Number of bytes which could be written without locking
    size_t safe_empty_buffer;

+    struct SwrContext *swr_ctx;
+
+    // The sample rate is the same for input and output
+    unsigned sample_rate;
+    // The number of channels is the same for input and output
+    unsigned nb_channels;
+
+    unsigned out_bytes_per_sample;
+
+    // Target buffer for resampling
+    uint8_t *swr_buf;
+    size_t swr_buf_alloc_size;
+
    const struct sc_audio_player_callbacks *cbs;
    void *cbs_userdata;
 };
--- a/app/src/decoder.c
+++ b/app/src/decoder.c
@ -54,7 +54,7 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) {
    } else {
        // Hardcoded audio properties
        decoder->codec_ctx->ch_layout =
-            (AVChannelLayout) AV_CHANNEL_LAYOUT_MONO;
+            (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
        decoder->codec_ctx->sample_rate = 48000;
    }

--- a/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java
+++ b/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java
@ -40,7 +40,7 @@ public final class AudioEncoder {
    }

    private static final int SAMPLE_RATE = 48000;
-    private static final int CHANNELS = 1;
+    private static final int CHANNELS = 2;

    private static final int BUFFER_MS = 10; // milliseconds
    private static final int BUFFER_SIZE = SAMPLE_RATE * CHANNELS * BUFFER_MS / 1000;