diff --git a/BUILD.md b/BUILD.md index 0c708bde..51f8141e 100644 --- a/BUILD.md +++ b/BUILD.md @@ -15,7 +15,7 @@ First, you need to install the required packages: sudo apt install ffmpeg libsdl2-2.0-0 adb wget \ gcc git pkg-config meson ninja-build libsdl2-dev \ libavcodec-dev libavdevice-dev libavformat-dev libavutil-dev \ - libusb-1.0-0 libusb-1.0-0-dev + libswresample-dev libusb-1.0-0 libusb-1.0-0-dev ``` Then clone the repo and execute the installation script @@ -94,7 +94,7 @@ sudo apt install ffmpeg libsdl2-2.0-0 adb libusb-1.0-0 # client build dependencies sudo apt install gcc git pkg-config meson ninja-build libsdl2-dev \ libavcodec-dev libavdevice-dev libavformat-dev libavutil-dev \ - libusb-1.0-0-dev + libswresample-dev libusb-1.0-0-dev # server build dependencies sudo apt install openjdk-11-jdk diff --git a/app/meson.build b/app/meson.build index aa4b1989..756042a1 100644 --- a/app/meson.build +++ b/app/meson.build @@ -4,6 +4,7 @@ src = [ 'src/adb/adb_device.c', 'src/adb/adb_parser.c', 'src/adb/adb_tunnel.c', + 'src/audio_player.c', 'src/cli.c', 'src/clock.c', 'src/compat.c', @@ -30,6 +31,7 @@ src = [ 'src/version.c', 'src/video_buffer.c', 'src/util/acksync.c', + 'src/util/average.c', 'src/util/bytebuf.c', 'src/util/file.c', 'src/util/intmap.c', @@ -100,6 +102,7 @@ if not crossbuild_windows dependency('libavformat', version: '>= 57.33'), dependency('libavcodec', version: '>= 57.37'), dependency('libavutil'), + dependency('libswresample'), dependency('sdl2', version: '>= 2.0.5'), ] @@ -134,12 +137,14 @@ else ffmpeg_avcodec = meson.get_cross_property('ffmpeg_avcodec') ffmpeg_avformat = meson.get_cross_property('ffmpeg_avformat') ffmpeg_avutil = meson.get_cross_property('ffmpeg_avutil') + ffmpeg_swresample = meson.get_cross_property('ffmpeg_swresample') ffmpeg = declare_dependency( dependencies: [ cc.find_library(ffmpeg_avcodec, dirs: ffmpeg_bin_dir), cc.find_library(ffmpeg_avformat, dirs: ffmpeg_bin_dir), cc.find_library(ffmpeg_avutil, dirs: ffmpeg_bin_dir), + cc.find_library(ffmpeg_swresample, dirs: ffmpeg_bin_dir), ], include_directories: include_directories(ffmpeg_include_dir) ) diff --git a/app/src/audio_player.c b/app/src/audio_player.c new file mode 100644 index 00000000..653291ff --- /dev/null +++ b/app/src/audio_player.c @@ -0,0 +1,355 @@ +#include "audio_player.h" + +#include + +#include "util/log.h" + +//#define SC_AUDIO_PLAYER_NDEBUG // comment to debug + +/** Downcast frame_sink to sc_audio_player */ +#define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink) + +#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT +#define SC_SDL_SAMPLE_FMT AUDIO_F32 + +#define SC_AUDIO_OUTPUT_BUFFER_SAMPLES 480 // 10ms at 48000Hz + +// The target number of buffered samples between the producer and the consumer. +// This value is directly use for compensation. +#define SC_TARGET_BUFFERED_SAMPLES (3 * SC_AUDIO_OUTPUT_BUFFER_SAMPLES) + +// Use a ring-buffer of 1 second (at 48000Hz) between the producer and the +// consumer. It too big, but it guarantees that the producer and the consumer +// will be able to access it in parallel without locking. +#define SC_BYTEBUF_SIZE_IN_SAMPLES 48000 + +static inline size_t +bytes_to_samples(struct sc_audio_player *ap, size_t bytes) { + assert(bytes % (ap->nb_channels * ap->out_bytes_per_sample) == 0); + return bytes / (ap->nb_channels * ap->out_bytes_per_sample); +} + +static inline size_t +samples_to_bytes(struct sc_audio_player *ap, size_t samples) { + return samples * ap->nb_channels * ap->out_bytes_per_sample; +} + +void +sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) { + struct sc_audio_player *ap = userdata; + + // This callback is called with the lock used by SDL_AudioDeviceLock(), so + // the bytebuf is protected + + assert(len_int > 0); + size_t len = len_int; + +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] SDL callback requests %" SC_PRIsizet " samples", + bytes_to_samples(ap, len)); +#endif + + size_t read_avail = sc_bytebuf_read_available(&ap->buf); + size_t read = MIN(read_avail, len); + if (read) { + sc_bytebuf_read(&ap->buf, stream, read); + } + + if (read < len) { + // Insert silence +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] Buffer underflow, inserting silence: %" SC_PRIsizet + " samples", bytes_to_samples(ap, len - read)); +#endif + memset(stream + read, 0, len - read); + ap->underflow += bytes_to_samples(ap, len - read); + } + + ap->last_consumed = sc_tick_now(); +} + +static uint8_t * +sc_audio_player_get_swr_buf(struct sc_audio_player *ap, size_t min_samples) { + size_t min_buf_size = samples_to_bytes(ap, min_samples); + if (min_buf_size < ap->swr_buf_alloc_size) { + size_t new_size = min_buf_size + 4096; + uint8_t *buf = realloc(ap->swr_buf, new_size); + if (!buf) { + LOG_OOM(); + // Could not realloc to the requested size + return NULL; + } + ap->swr_buf = buf; + ap->swr_buf_alloc_size = new_size; + } + + return ap->swr_buf; +} + +static bool +sc_audio_player_frame_sink_open(struct sc_frame_sink *sink, + const AVCodecContext *ctx) { + struct sc_audio_player *ap = DOWNCAST(sink); + + SDL_AudioSpec desired = { + .freq = ctx->sample_rate, + .format = SC_SDL_SAMPLE_FMT, + .channels = ctx->ch_layout.nb_channels, + .samples = SC_AUDIO_OUTPUT_BUFFER_SAMPLES, + .callback = sc_audio_player_sdl_callback, + .userdata = ap, + }; + SDL_AudioSpec obtained; + + ap->device = SDL_OpenAudioDevice(NULL, 0, &desired, &obtained, 0); + if (!ap->device) { + LOGE("Could not open audio device: %s", SDL_GetError()); + return false; + } + + SwrContext *swr_ctx = swr_alloc(); + if (!swr_ctx) { + LOG_OOM(); + goto error_close_audio_device; + } + ap->swr_ctx = swr_ctx; + + assert(ctx->sample_rate > 0); + assert(ctx->ch_layout.nb_channels > 0); + assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT)); + int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT); + assert(out_bytes_per_sample > 0); + + av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0); + av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0); + + av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0); + av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0); + + av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0); + av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0); + + int ret = swr_init(swr_ctx); + if (ret) { + LOGE("Failed to initialize the resampling context"); + goto error_free_swr_ctx; + } + + ap->sample_rate = ctx->sample_rate; + ap->nb_channels = ctx->ch_layout.nb_channels; + ap->out_bytes_per_sample = out_bytes_per_sample; + + size_t bytebuf_size = samples_to_bytes(ap, SC_BYTEBUF_SIZE_IN_SAMPLES); + + bool ok = sc_bytebuf_init(&ap->buf, bytebuf_size); + if (!ok) { + goto error_free_swr_ctx; + } + + size_t initial_swr_buf_size = samples_to_bytes(ap, 4096); + ap->swr_buf = malloc(initial_swr_buf_size); + if (!ap->swr_buf) { + LOG_OOM(); + goto error_destroy_bytebuf; + } + ap->swr_buf_alloc_size = initial_swr_buf_size; + + ap->previous_write_avail = sc_bytebuf_write_available(&ap->buf); + + sc_average_init(&ap->avg_buffering, 8); + ap->samples_since_resync = 0; + + ap->last_consumed = 0; + ap->underflow = 0; + + SDL_PauseAudioDevice(ap->device, 0); + + return true; + +error_destroy_bytebuf: + sc_bytebuf_destroy(&ap->buf); +error_free_swr_ctx: + swr_free(&ap->swr_ctx); +error_close_audio_device: + SDL_CloseAudioDevice(ap->device); + + return false; +} + +static void +sc_audio_player_frame_sink_close(struct sc_frame_sink *sink) { + struct sc_audio_player *ap = DOWNCAST(sink); + + assert(ap->device); + SDL_PauseAudioDevice(ap->device, 1); + SDL_CloseAudioDevice(ap->device); + + free(ap->swr_buf); + sc_bytebuf_destroy(&ap->buf); + swr_free(&ap->swr_ctx); +} + +static bool +sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, + const AVFrame *frame) { + struct sc_audio_player *ap = DOWNCAST(sink); + + SwrContext *swr_ctx = ap->swr_ctx; + + int64_t delay = swr_get_delay(swr_ctx, ap->sample_rate); + // No need to av_rescale_rnd(), input and output sample rates are the same + // Add more space (256) for clock compensation + int dst_nb_samples = delay + frame->nb_samples + 256; + + uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, dst_nb_samples); + if (!swr_buf) { + return false; + } + + int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples, + (const uint8_t **) frame->data, frame->nb_samples); + if (ret < 0) { + LOGE("Resampling failed: %d", ret); + return false; + } + + // swr_convert() returns the number of samples which would have been + // written if the buffer was big enough. + size_t samples_written = MIN(ret, dst_nb_samples); + size_t swr_buf_size = samples_to_bytes(ap, samples_written); +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGI("[Audio] %" SC_PRIsizet " samples written to buffer", samples_written); +#endif + + // Since this function is the only writer, the current available space is + // at least the previous available space. In practice, it should almost + // always be possible to write without lock. + bool lockless_write = swr_buf_size <= ap->previous_write_avail; + if (lockless_write) { + sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size); + } + + SDL_LockAudioDevice(ap->device); + + // The consumer requests audio samples blocks (e.g. 480 samples). + // Convert the duration since the last consumption into samples. + size_t extrapolated = 0; + if (ap->last_consumed) { + sc_tick now = sc_tick_now(); + assert(now >= ap->last_consumed); + extrapolated = (sc_tick_now() - ap->last_consumed) * ap->sample_rate + / SC_TICK_FREQ; + } + + size_t read_avail = sc_bytebuf_read_available(&ap->buf); + + // The consumer may not increase underflow value if there are still samples + // available + assert(read_avail == 0 || ap->underflow == 0); + + size_t buffered_samples = bytes_to_samples(ap, read_avail); + // Underflow caused silence samples in excess (so it adds buffering). + // Extrapolated samples must be considered consumed for smoothing (so it + // removes buffering). + float buffering = (float) buffered_samples + ap->underflow - extrapolated; + sc_average_push(&ap->avg_buffering, buffering); + +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[AUDIO] buffered_samples=%" SC_PRIsizet + " underflow=%" SC_PRIsizet + " extrapolated=%" SC_PRIsizet + " buffering=%f avg_buffering=%f", + buffered_samples, ap->underflow, extrapolated, buffering, + sc_average_get(&ap->avg_buffering)); +#endif + + if (lockless_write) { + sc_bytebuf_commit_write(&ap->buf, swr_buf_size); + } else { + // Take care to keep full samples + size_t align = ap->nb_channels * ap->out_bytes_per_sample; + size_t write_avail = + sc_bytebuf_write_available(&ap->buf) / align * align; + if (swr_buf_size > write_avail) { + // Skip old samples + size_t cap = sc_bytebuf_capacity(&ap->buf) / align * align; + if (swr_buf_size > cap) { + // Ignore the first bytes in swr_buf + swr_buf += swr_buf_size - cap; + swr_buf_size = cap; + } + assert(swr_buf_size > write_avail); + if (swr_buf_size - write_avail > 0) { + sc_bytebuf_skip(&ap->buf, swr_buf_size - write_avail); + } + } + sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size); + } + + // On buffer underflow, typically because a packet is late, silence is + // inserted. In that case, the late samples must be ignored when they + // arrive, otherwise they will delay playback. + // + // As an improvement, instead of naively skipping the silence duration, we + // can absorb it if it helps clock compensation. + if (ap->underflow) { + size_t avg = sc_average_get(&ap->avg_buffering); + if (avg > SC_TARGET_BUFFERED_SAMPLES) { + size_t diff = SC_TARGET_BUFFERED_SAMPLES - avg; + if (diff < ap->underflow) { + // Partially absorb underflow for clock compensation (only keep + // the diff with the target buffering level). + ap->underflow = diff; + } + + size_t skip_samples = MIN(ap->underflow, buffered_samples); + if (skip_samples) { + size_t skip_bytes = samples_to_bytes(ap, skip_samples); + sc_bytebuf_skip(&ap->buf, skip_bytes); + read_avail -= skip_bytes; +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] Skipping %" SC_PRIsizet " samples", skip_samples); +#endif + } + } else { + // Totally absorb underflow for clock compensation + ap->underflow = 0; + } + } + + ap->previous_write_avail = sc_bytebuf_write_available(&ap->buf); + + SDL_UnlockAudioDevice(ap->device); + + ap->samples_since_resync += samples_written; + if (ap->samples_since_resync >= ap->sample_rate) { + // Resync every second + ap->samples_since_resync = 0; + + float avg = sc_average_get(&ap->avg_buffering); + int diff = SC_TARGET_BUFFERED_SAMPLES - avg; +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGI("[Audio] Average buffering=%f, compensation %d", avg, diff); +#endif + // Compensate the diff over 3 seconds (but will be recomputed after + // 1 second) + int ret = swr_set_compensation(swr_ctx, diff, 3 * ap->sample_rate); + if (ret < 0) { + LOGW("Resampling compensation failed: %d", ret); + // not fatal + } + } + + return true; +} + +void +sc_audio_player_init(struct sc_audio_player *ap) { + static const struct sc_frame_sink_ops ops = { + .open = sc_audio_player_frame_sink_open, + .close = sc_audio_player_frame_sink_close, + .push = sc_audio_player_frame_sink_push, + }; + + ap->frame_sink.ops = &ops; +} diff --git a/app/src/audio_player.h b/app/src/audio_player.h new file mode 100644 index 00000000..b455543c --- /dev/null +++ b/app/src/audio_player.h @@ -0,0 +1,60 @@ +#ifndef SC_AUDIO_PLAYER_H +#define SC_AUDIO_PLAYER_H + +#include "common.h" + +#include +#include "trait/frame_sink.h" +#include +#include +#include + +#include +#include +#include + +struct sc_audio_player { + struct sc_frame_sink frame_sink; + + SDL_AudioDeviceID device; + + // protected by SDL_AudioDeviceLock() + struct sc_bytebuf buf; + size_t previous_write_avail; + + struct SwrContext *swr_ctx; + + // The sample rate is the same for input and output + unsigned sample_rate; + // The number of channels is the same for input and output + unsigned nb_channels; + // The number of bytes per sample for a single channel + unsigned out_bytes_per_sample; + + // Target buffer for resampling + uint8_t *swr_buf; + size_t swr_buf_alloc_size; + + // Number of buffered samples (may be negative on underflow) + struct sc_average avg_buffering; + // Count the number of samples to trigger a compensation update regularly + size_t samples_since_resync; + + // The last date a sample has been consumed by the audio output + sc_tick last_consumed; + + // Number of silence samples inserted to be compensated + size_t underflow; + + const struct sc_audio_player_callbacks *cbs; + void *cbs_userdata; +}; + +struct sc_audio_player_callbacks { + void (*on_ended)(struct sc_audio_player *ap, bool success, void *userdata); +}; + +void +sc_audio_player_init(struct sc_audio_player *ap); + +#endif diff --git a/app/src/decoder.c b/app/src/decoder.c index 96d4a010..ef66bb08 100644 --- a/app/src/decoder.c +++ b/app/src/decoder.c @@ -2,6 +2,7 @@ #include #include +#include #include "events.h" #include "video_buffer.h" @@ -50,6 +51,11 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) { if (codec->type == AVMEDIA_TYPE_VIDEO) { // Hardcoded video properties decoder->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; + } else { + // Hardcoded audio properties + decoder->codec_ctx->ch_layout = + (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO; + decoder->codec_ctx->sample_rate = 48000; } if (avcodec_open2(decoder->codec_ctx, codec, NULL) < 0) { diff --git a/app/src/scrcpy.c b/app/src/scrcpy.c index eb70749a..45c8017d 100644 --- a/app/src/scrcpy.c +++ b/app/src/scrcpy.c @@ -13,6 +13,7 @@ # include #endif +#include "audio_player.h" #include "controller.h" #include "decoder.h" #include "demuxer.h" @@ -40,6 +41,7 @@ struct scrcpy { struct sc_server server; struct sc_screen screen; + struct sc_audio_player audio_player; struct sc_demuxer video_demuxer; struct sc_demuxer audio_demuxer; struct sc_decoder video_decoder; @@ -383,9 +385,16 @@ scrcpy(struct scrcpy_options *options) { } // Initialize SDL video in addition if display is enabled - if (options->display && SDL_Init(SDL_INIT_VIDEO)) { - LOGE("Could not initialize SDL: %s", SDL_GetError()); - goto end; + if (options->display) { + if (SDL_Init(SDL_INIT_VIDEO)) { + LOGE("Could not initialize SDL video: %s", SDL_GetError()); + goto end; + } + + if (options->audio && SDL_Init(SDL_INIT_AUDIO)) { + LOGE("Could not initialize SDL audio: %s", SDL_GetError()); + goto end; + } } sdl_configure(options->display, options->disable_screensaver); @@ -663,6 +672,11 @@ aoa_hid_end: screen_initialized = true; sc_decoder_add_sink(&s->video_decoder, &s->screen.frame_sink); + + if (options->audio) { + sc_audio_player_init(&s->audio_player); + sc_decoder_add_sink(&s->audio_decoder, &s->audio_player.frame_sink); + } } #ifdef HAVE_V4L2 diff --git a/app/src/util/average.c b/app/src/util/average.c new file mode 100644 index 00000000..ace23d45 --- /dev/null +++ b/app/src/util/average.c @@ -0,0 +1,26 @@ +#include "average.h" + +#include + +void +sc_average_init(struct sc_average *avg, unsigned range) { + avg->range = range; + avg->avg = 0; + avg->count = 0; +} + +void +sc_average_push(struct sc_average *avg, float value) { + if (avg->count < avg->range) { + ++avg->count; + } + + assert(avg->count); + avg->avg = ((avg->count - 1) * avg->avg + value) / avg->count; +} + +float +sc_average_get(struct sc_average *avg) { + assert(avg->count); + return avg->avg; +} diff --git a/app/src/util/average.h b/app/src/util/average.h new file mode 100644 index 00000000..59fae7d1 --- /dev/null +++ b/app/src/util/average.h @@ -0,0 +1,40 @@ +#ifndef SC_AVERAGE +#define SC_AVERAGE + +#include "common.h" + +#include +#include + +struct sc_average { + // Current average value + float avg; + + // Target range, to update the average as follow: + // avg = ((range - 1) * avg + new_value) / range + unsigned range; + + // Number of values pushed when less than range (count <= range). + // The purpose is to handle the first (range - 1) values properly. + unsigned count; +}; + +void +sc_average_init(struct sc_average *avg, unsigned range); + +/** + * Push a new value to update the "rolling" average + */ +void +sc_average_push(struct sc_average *avg, float value); + +/** + * Get the current average value + * + * It is an error to call this function if sc_average_push() has not been + * called at least once. + */ +float +sc_average_get(struct sc_average *avg); + +#endif diff --git a/cross_win32.txt b/cross_win32.txt index 32226949..73922ad4 100644 --- a/cross_win32.txt +++ b/cross_win32.txt @@ -19,6 +19,7 @@ endian = 'little' ffmpeg_avcodec = 'avcodec-58' ffmpeg_avformat = 'avformat-58' ffmpeg_avutil = 'avutil-56' +ffmpeg_swresample = 'swresample-3' prebuilt_ffmpeg = 'ffmpeg-win32-4.3.1' prebuilt_sdl2 = 'SDL2-2.26.1/i686-w64-mingw32' prebuilt_libusb_root = 'libusb-1.0.26' diff --git a/cross_win64.txt b/cross_win64.txt index 4dde4ab1..0e9bd29f 100644 --- a/cross_win64.txt +++ b/cross_win64.txt @@ -19,6 +19,7 @@ endian = 'little' ffmpeg_avcodec = 'avcodec-59' ffmpeg_avformat = 'avformat-59' ffmpeg_avutil = 'avutil-57' +ffmpeg_swresample = 'swresample-4' prebuilt_ffmpeg = 'ffmpeg-win64-5.1.2' prebuilt_sdl2 = 'SDL2-2.26.1/x86_64-w64-mingw32' prebuilt_libusb_root = 'libusb-1.0.26'