From 455a802a16692ddaf070822aad260d75d443baef Mon Sep 17 00:00:00 2001
From: Romain Vimont <rom@rom1v.com>
Date: Sat, 25 Feb 2023 21:21:15 +0100
Subject: [PATCH] swr

---
 app/meson.build                               |   1 +
 app/src/audio_player.c                        | 128 ++++++++++++++----
 app/src/audio_player.h                        |  14 ++
 app/src/decoder.c                             |   2 +-
 .../com/genymobile/scrcpy/AudioEncoder.java   |   2 +-
 5 files changed, 115 insertions(+), 32 deletions(-)

diff --git a/app/meson.build b/app/meson.build
index 562a5358..9e0aad74 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -101,6 +101,7 @@ if not crossbuild_windows
         dependency('libavformat', version: '>= 57.33'),
         dependency('libavcodec', version: '>= 57.37'),
         dependency('libavutil'),
+        dependency('libswresample'),
         dependency('sdl2', version: '>= 2.0.5'),
     ]
 
diff --git a/app/src/audio_player.c b/app/src/audio_player.c
index f1649c60..1ff08107 100644
--- a/app/src/audio_player.c
+++ b/app/src/audio_player.c
@@ -1,10 +1,15 @@
 #include "audio_player.h"
 
+#include <libavutil/opt.h>
+
 #include "util/log.h"
 
 /** Downcast frame_sink to sc_v4l2_sink */
 #define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink)
 
+#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_S16
+#define SC_SDL_SAMPLE_FMT AUDIO_S16
+
 void
 sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
     struct sc_audio_player *ap = userdata;
@@ -28,20 +33,29 @@ sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
     }
 }
 
-static SDL_AudioFormat
-sc_audio_player_ffmpeg_to_sdl_format(enum AVSampleFormat format) {
-    switch (format) {
-        case AV_SAMPLE_FMT_S16:
-            return AUDIO_S16;
-        case AV_SAMPLE_FMT_S32:
-            return AUDIO_S32;
-        case AV_SAMPLE_FMT_FLT:
-            return AUDIO_F32;
-        default:
-            LOGE("Unsupported FFmpeg sample format: %s",
-                 av_get_sample_fmt_name(format));
-            return 0;
+static size_t
+sc_audio_player_get_swr_buf_size(struct sc_audio_player *ap, size_t samples) {
+    assert(ap->nb_channels);
+    assert(ap->out_bytes_per_sample);
+    return samples * ap->nb_channels * ap->out_bytes_per_sample;
+}
+
+static uint8_t *
+sc_audio_player_get_swr_buf(struct sc_audio_player *ap, size_t min_samples) {
+    size_t min_buf_size = sc_audio_player_get_swr_buf_size(ap, min_samples);
+    if (min_buf_size < ap->swr_buf_alloc_size) {
+        size_t new_size = min_buf_size + 4096;
+        uint8_t *buf = realloc(ap->swr_buf, new_size);
+        if (!buf) {
+            LOG_OOM();
+            // Could not realloc to the requested size
+            return NULL;
+        }
+        ap->swr_buf = buf;
+        ap->swr_buf_alloc_size = new_size;
     }
+
+    return ap->swr_buf;
 }
 
 static bool
@@ -49,20 +63,45 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
                                 const AVCodecContext *ctx) {
     struct sc_audio_player *ap = DOWNCAST(sink);
 
-    SDL_AudioFormat format =
-        sc_audio_player_ffmpeg_to_sdl_format(ctx->sample_fmt);
-    if (!format) {
-        // error already logged
-        //return false;
-        format = AUDIO_F32; // it's planar, but for now there is only 1 channel
+    SwrContext *swr_ctx = ap->swr_ctx;
+    assert(swr_ctx);
+
+    assert(ctx->sample_rate > 0);
+    assert(ctx->ch_layout.nb_channels > 0);
+    assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
+    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
+    assert(out_bytes_per_sample > 0);
+
+    av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
+    av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0);
+    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0);
+
+    av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
+    av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0);
+    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0);
+
+    int ret = swr_init(swr_ctx);
+    if (ret) {
+        LOGE("Failed to initialize the resampling context");
+        return false;
+    }
+
+    ap->sample_rate = ctx->sample_rate;
+    ap->nb_channels = ctx->ch_layout.nb_channels;
+    ap->out_bytes_per_sample = out_bytes_per_sample;
+
+    size_t initial_swr_buf_size = sc_audio_player_get_swr_buf_size(ap, 4096);
+    ap->swr_buf = malloc(initial_swr_buf_size);
+    if (!ap->swr_buf) {
+        LOG_OOM();
+        return false;
     }
-    LOGI("%d\n", ctx->sample_rate);
 
     SDL_AudioSpec desired = {
         .freq = ctx->sample_rate,
-        .format = format,
+        .format = SC_SDL_SAMPLE_FMT,
         .channels = ctx->ch_layout.nb_channels,
-        .samples = 2048,
+        .samples = 512, // ~10ms at 48000Hz
         .callback = sc_audio_player_sdl_callback,
         .userdata = ap,
     };
@@ -92,24 +131,41 @@ static bool
 sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, const AVFrame *frame) {
     struct sc_audio_player *ap = DOWNCAST(sink);
 
-    const uint8_t *data = frame->data[0];
-    size_t size = frame->linesize[0];
+    SwrContext *swr_ctx = ap->swr_ctx;
+
+    int64_t delay = swr_get_delay(swr_ctx, ap->sample_rate);
+    // No need to av_rescale_rnd(), input and output sample rates are the same
+    int dst_nb_samples = delay + frame->nb_samples;
+
+    uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, frame->nb_samples);
+    if (!swr_buf) {
+        return false;
+    }
+
+    int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples,
+                          (const uint8_t **) frame->data, frame->nb_samples);
+    if (ret < 0) {
+        LOGE("Resampling failed: %d", ret);
+        return false;
+    }
+    LOGI("ret=%d dst_nb_samples=%d\n", ret, dst_nb_samples);
+
+    size_t swr_buf_size = sc_audio_player_get_swr_buf_size(ap, ret);
+    LOGI("== swr_buf_size %lu", swr_buf_size);
 
-    // TODO convert to non planar format
-    // TODO then re-enable stereo
     // TODO clock drift compensation
 
     // It should almost always be possible to write without lock
-    bool can_write_without_lock = size <= ap->safe_empty_buffer;
+    bool can_write_without_lock = swr_buf_size <= ap->safe_empty_buffer;
     if (can_write_without_lock) {
-        sc_bytebuf_prepare_write(&ap->buf, data, size);
+        sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size);
     }
 
     SDL_LockAudioDevice(ap->device);
     if (can_write_without_lock) {
-        sc_bytebuf_commit_write(&ap->buf, size);
+        sc_bytebuf_commit_write(&ap->buf, swr_buf_size);
     } else {
-        sc_bytebuf_write(&ap->buf, data, size);
+        sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size);
     }
 
     // The next time, it will remain at least the current empty space
@@ -128,8 +184,18 @@ sc_audio_player_init(struct sc_audio_player *ap,
         return false;
     }
 
+    ap->swr_ctx = swr_alloc();
+    if (!ap->swr_ctx) {
+        sc_bytebuf_destroy(&ap->buf);
+        LOG_OOM();
+        return false;
+    }
+
     ap->safe_empty_buffer = sc_bytebuf_write_remaining(&ap->buf);
 
+    ap->swr_buf = NULL;
+    ap->swr_buf_alloc_size = 0;
+
     assert(cbs && cbs->on_ended);
     ap->cbs = cbs;
     ap->cbs_userdata = cbs_userdata;
@@ -147,4 +213,6 @@ sc_audio_player_init(struct sc_audio_player *ap,
 void
 sc_audio_player_destroy(struct sc_audio_player *ap) {
     sc_bytebuf_destroy(&ap->buf);
+    swr_free(&ap->swr_ctx);
+    free(ap->swr_buf);
 }
diff --git a/app/src/audio_player.h b/app/src/audio_player.h
index fbe6aac0..c2d3b37e 100644
--- a/app/src/audio_player.h
+++ b/app/src/audio_player.h
@@ -9,6 +9,7 @@
 #include <util/thread.h>
 
 #include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
 #include <SDL2/SDL.h>
 
 struct sc_audio_player {
@@ -21,6 +22,19 @@ struct sc_audio_player {
     // Number of bytes which could be written without locking
     size_t safe_empty_buffer;
 
+    struct SwrContext *swr_ctx;
+
+    // The sample rate is the same for input and output
+    unsigned sample_rate;
+    // The number of channels is the same for input and output
+    unsigned nb_channels;
+
+    unsigned out_bytes_per_sample;
+
+    // Target buffer for resampling
+    uint8_t *swr_buf;
+    size_t swr_buf_alloc_size;
+
     const struct sc_audio_player_callbacks *cbs;
     void *cbs_userdata;
 };
diff --git a/app/src/decoder.c b/app/src/decoder.c
index 644d410e..ef66bb08 100644
--- a/app/src/decoder.c
+++ b/app/src/decoder.c
@@ -54,7 +54,7 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) {
     } else {
         // Hardcoded audio properties
         decoder->codec_ctx->ch_layout =
-            (AVChannelLayout) AV_CHANNEL_LAYOUT_MONO;
+            (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
         decoder->codec_ctx->sample_rate = 48000;
     }
 
diff --git a/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java b/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java
index 8f950d47..3b012b11 100644
--- a/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java
+++ b/server/src/main/java/com/genymobile/scrcpy/AudioEncoder.java
@@ -40,7 +40,7 @@ public final class AudioEncoder {
     }
 
     private static final int SAMPLE_RATE = 48000;
-    private static final int CHANNELS = 1;
+    private static final int CHANNELS = 2;
 
     private static final int BUFFER_MS = 10; // milliseconds
     private static final int BUFFER_SIZE = SAMPLE_RATE * CHANNELS * BUFFER_MS / 1000;