diff --git a/app/meson.build b/app/meson.build index aa4b1989..756042a1 100644 --- a/app/meson.build +++ b/app/meson.build @@ -4,6 +4,7 @@ src = [ 'src/adb/adb_device.c', 'src/adb/adb_parser.c', 'src/adb/adb_tunnel.c', + 'src/audio_player.c', 'src/cli.c', 'src/clock.c', 'src/compat.c', @@ -30,6 +31,7 @@ src = [ 'src/version.c', 'src/video_buffer.c', 'src/util/acksync.c', + 'src/util/average.c', 'src/util/bytebuf.c', 'src/util/file.c', 'src/util/intmap.c', @@ -100,6 +102,7 @@ if not crossbuild_windows dependency('libavformat', version: '>= 57.33'), dependency('libavcodec', version: '>= 57.37'), dependency('libavutil'), + dependency('libswresample'), dependency('sdl2', version: '>= 2.0.5'), ] @@ -134,12 +137,14 @@ else ffmpeg_avcodec = meson.get_cross_property('ffmpeg_avcodec') ffmpeg_avformat = meson.get_cross_property('ffmpeg_avformat') ffmpeg_avutil = meson.get_cross_property('ffmpeg_avutil') + ffmpeg_swresample = meson.get_cross_property('ffmpeg_swresample') ffmpeg = declare_dependency( dependencies: [ cc.find_library(ffmpeg_avcodec, dirs: ffmpeg_bin_dir), cc.find_library(ffmpeg_avformat, dirs: ffmpeg_bin_dir), cc.find_library(ffmpeg_avutil, dirs: ffmpeg_bin_dir), + cc.find_library(ffmpeg_swresample, dirs: ffmpeg_bin_dir), ], include_directories: include_directories(ffmpeg_include_dir) ) diff --git a/app/src/audio_player.c b/app/src/audio_player.c new file mode 100644 index 00000000..f9bc13a0 --- /dev/null +++ b/app/src/audio_player.c @@ -0,0 +1,289 @@ +#include "audio_player.h" + +#include + +#include "util/log.h" + +#define SC_AUDIO_PLAYER_NDEBUG // comment to debug + +/** Downcast frame_sink to sc_audio_player */ +#define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink) + +#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT +#define SC_SDL_SAMPLE_FMT AUDIO_F32 + +#define SC_AUDIO_OUTPUT_BUFFER_SAMPLES 480 // 10ms at 48000Hz + +// The target number of buffered samples between the producer and the consumer. +// This value is directly use for compensation. +#define SC_TARGET_BUFFERED_SAMPLES (3 * SC_AUDIO_OUTPUT_BUFFER_SAMPLES) + +// If the consumer is too late, skip samples to keep at most this value +#define SC_BUFFERED_SAMPLES_THRESHOLD 2400 // 50ms at 48000Hz + +// Use a ring-buffer of 1 second (at 48000Hz) between the producer and the +// consumer. It too big, but it guarantees that the producer and the consumer +// will be able to access it in parallel without locking. +#define SC_BYTEBUF_SIZE_IN_SAMPLES 48000 + +void +sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) { + struct sc_audio_player *ap = userdata; + + // This callback is called with the lock used by SDL_AudioDeviceLock(), so + // the bytebuf is protected + + assert(len_int > 0); + size_t len = len_int; + +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] SDL callback requests %" SC_PRIsizet " samples", + len / (ap->nb_channels * ap->out_bytes_per_sample)); +#endif + + size_t read = sc_bytebuf_read_remaining(&ap->buf); + size_t max_buffered_bytes = SC_BUFFERED_SAMPLES_THRESHOLD + * ap->nb_channels * ap->out_bytes_per_sample; + if (read > max_buffered_bytes + len) { + size_t skip = read - (max_buffered_bytes + len); +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] Buffered samples threshold exceeded: %" SC_PRIsizet + " bytes, skipping %" SC_PRIsizet " bytes", read, skip); +#endif + // After this callback, exactly max_buffered_bytes will remain + sc_bytebuf_skip(&ap->buf, skip); + read = max_buffered_bytes + len; + } + + // Number of buffered samples (may be negative on underflow) + float buffered_samples = ((float) read - len_int) + / (ap->nb_channels * ap->out_bytes_per_sample); + sc_average_push(&ap->avg_buffered_samples, buffered_samples); + + if (read) { + if (read > len) { + read = len; + } + sc_bytebuf_read(&ap->buf, stream, read); + } + + if (read < len) { + // Insert silence +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] Buffer underflow, inserting silence: %" SC_PRIsizet + " bytes", len - read); +#endif + memset(stream + read, 0, len - read); + } +} + +static size_t +sc_audio_player_get_buf_size(struct sc_audio_player *ap, size_t samples) { + assert(ap->nb_channels); + assert(ap->out_bytes_per_sample); + return samples * ap->nb_channels * ap->out_bytes_per_sample; +} + +static uint8_t * +sc_audio_player_get_swr_buf(struct sc_audio_player *ap, size_t min_samples) { + size_t min_buf_size = sc_audio_player_get_buf_size(ap, min_samples); + if (min_buf_size < ap->swr_buf_alloc_size) { + size_t new_size = min_buf_size + 4096; + uint8_t *buf = realloc(ap->swr_buf, new_size); + if (!buf) { + LOG_OOM(); + // Could not realloc to the requested size + return NULL; + } + ap->swr_buf = buf; + ap->swr_buf_alloc_size = new_size; + } + + return ap->swr_buf; +} + +static bool +sc_audio_player_frame_sink_open(struct sc_frame_sink *sink, + const AVCodecContext *ctx) { + struct sc_audio_player *ap = DOWNCAST(sink); + + SDL_AudioSpec desired = { + .freq = ctx->sample_rate, + .format = SC_SDL_SAMPLE_FMT, + .channels = ctx->ch_layout.nb_channels, + .samples = SC_AUDIO_OUTPUT_BUFFER_SAMPLES, + .callback = sc_audio_player_sdl_callback, + .userdata = ap, + }; + SDL_AudioSpec obtained; + + ap->device = SDL_OpenAudioDevice(NULL, 0, &desired, &obtained, 0); + if (!ap->device) { + LOGE("Could not open audio device: %s", SDL_GetError()); + return false; + } + + SwrContext *swr_ctx = swr_alloc(); + if (!swr_ctx) { + LOG_OOM(); + goto error_close_audio_device; + } + ap->swr_ctx = swr_ctx; + + assert(ctx->sample_rate > 0); + assert(ctx->ch_layout.nb_channels > 0); + assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT)); + int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT); + assert(out_bytes_per_sample > 0); + + av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0); + av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0); + + av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0); + av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0); + + av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0); + av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0); + + int ret = swr_init(swr_ctx); + if (ret) { + LOGE("Failed to initialize the resampling context"); + goto error_free_swr_ctx; + } + + ap->sample_rate = ctx->sample_rate; + ap->nb_channels = ctx->ch_layout.nb_channels; + ap->out_bytes_per_sample = out_bytes_per_sample; + + size_t bytebuf_size = + sc_audio_player_get_buf_size(ap, SC_BYTEBUF_SIZE_IN_SAMPLES); + + bool ok = sc_bytebuf_init(&ap->buf, bytebuf_size); + if (!ok) { + goto error_free_swr_ctx; + } + + ap->safe_empty_buffer = sc_bytebuf_write_remaining(&ap->buf); + + size_t initial_swr_buf_size = sc_audio_player_get_buf_size(ap, 4096); + ap->swr_buf = malloc(initial_swr_buf_size); + if (!ap->swr_buf) { + LOG_OOM(); + goto error_destroy_bytebuf; + } + ap->swr_buf_alloc_size = initial_swr_buf_size; + + sc_average_init(&ap->avg_buffered_samples, 32); + ap->samples_since_resync = 0; + + SDL_PauseAudioDevice(ap->device, 0); + + return true; + +error_destroy_bytebuf: + sc_bytebuf_destroy(&ap->buf); +error_free_swr_ctx: + swr_free(&ap->swr_ctx); +error_close_audio_device: + SDL_CloseAudioDevice(ap->device); + + return false; +} + +static void +sc_audio_player_frame_sink_close(struct sc_frame_sink *sink) { + struct sc_audio_player *ap = DOWNCAST(sink); + + assert(ap->device); + SDL_PauseAudioDevice(ap->device, 1); + SDL_CloseAudioDevice(ap->device); + + free(ap->swr_buf); + sc_bytebuf_destroy(&ap->buf); + swr_free(&ap->swr_ctx); +} + +static bool +sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, const AVFrame *frame) { + struct sc_audio_player *ap = DOWNCAST(sink); + + SwrContext *swr_ctx = ap->swr_ctx; + + int64_t delay = swr_get_delay(swr_ctx, ap->sample_rate); + // No need to av_rescale_rnd(), input and output sample rates are the same + int dst_nb_samples = delay + frame->nb_samples; + + uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, frame->nb_samples); + if (!swr_buf) { + return false; + } + + int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples, + (const uint8_t **) frame->data, frame->nb_samples); + if (ret < 0) { + LOGE("Resampling failed: %d", ret); + return false; + } + + size_t samples_written = ret; + size_t swr_buf_size = sc_audio_player_get_buf_size(ap, samples_written); +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGI("[Audio] %" SC_PRIsizet " samples written to buffer", samples_written); +#endif + + // It should almost always be possible to write without lock + bool can_write_without_lock = swr_buf_size <= ap->safe_empty_buffer; + if (can_write_without_lock) { + sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size); + } + + SDL_LockAudioDevice(ap->device); + if (can_write_without_lock) { + sc_bytebuf_commit_write(&ap->buf, swr_buf_size); + } else { + sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size); + } + + // The next time, it will remain at least the current empty space + ap->safe_empty_buffer = sc_bytebuf_write_remaining(&ap->buf); + + // Read the value written by the SDL thread under lock + float avg; + bool has_avg = sc_average_get(&ap->avg_buffered_samples, &avg); + + SDL_UnlockAudioDevice(ap->device); + + if (has_avg) { + ap->samples_since_resync += samples_written; + if (ap->samples_since_resync >= ap->sample_rate) { + // Resync every second + ap->samples_since_resync = 0; + + int diff = SC_TARGET_BUFFERED_SAMPLES - avg; +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGI("[Audio] Average buffered samples = %f, compensation %d", + avg, diff); +#endif + // Compensate the diff over 3 seconds (but will be recomputed after + // 1 second) + int ret = swr_set_compensation(swr_ctx, diff, 3 * ap->sample_rate); + if (ret < 0) { + LOGW("Resampling compensation failed: %d", ret); + // not fatal + } + } + } + + return true; +} + +void +sc_audio_player_init(struct sc_audio_player *ap) { + static const struct sc_frame_sink_ops ops = { + .open = sc_audio_player_frame_sink_open, + .close = sc_audio_player_frame_sink_close, + .push = sc_audio_player_frame_sink_push, + }; + + ap->frame_sink.ops = &ops; +} diff --git a/app/src/audio_player.h b/app/src/audio_player.h new file mode 100644 index 00000000..a2c47c54 --- /dev/null +++ b/app/src/audio_player.h @@ -0,0 +1,54 @@ +#ifndef SC_AUDIO_PLAYER_H +#define SC_AUDIO_PLAYER_H + +#include "common.h" + +#include +#include "trait/frame_sink.h" +#include +#include +#include + +#include +#include +#include + +struct sc_audio_player { + struct sc_frame_sink frame_sink; + + SDL_AudioDeviceID device; + + // protected by SDL_AudioDeviceLock() + struct sc_bytebuf buf; + // Number of bytes which could be written without locking + size_t safe_empty_buffer; + + struct SwrContext *swr_ctx; + + // The sample rate is the same for input and output + unsigned sample_rate; + // The number of channels is the same for input and output + unsigned nb_channels; + + unsigned out_bytes_per_sample; + + // Target buffer for resampling + uint8_t *swr_buf; + size_t swr_buf_alloc_size; + + // Number of buffered samples (may be negative on underflow) + struct sc_average avg_buffered_samples; + unsigned samples_since_resync; + + const struct sc_audio_player_callbacks *cbs; + void *cbs_userdata; +}; + +struct sc_audio_player_callbacks { + void (*on_ended)(struct sc_audio_player *ap, bool success, void *userdata); +}; + +void +sc_audio_player_init(struct sc_audio_player *ap); + +#endif diff --git a/app/src/decoder.c b/app/src/decoder.c index 96d4a010..ef66bb08 100644 --- a/app/src/decoder.c +++ b/app/src/decoder.c @@ -2,6 +2,7 @@ #include #include +#include #include "events.h" #include "video_buffer.h" @@ -50,6 +51,11 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) { if (codec->type == AVMEDIA_TYPE_VIDEO) { // Hardcoded video properties decoder->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; + } else { + // Hardcoded audio properties + decoder->codec_ctx->ch_layout = + (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO; + decoder->codec_ctx->sample_rate = 48000; } if (avcodec_open2(decoder->codec_ctx, codec, NULL) < 0) { diff --git a/app/src/scrcpy.c b/app/src/scrcpy.c index eb70749a..45c8017d 100644 --- a/app/src/scrcpy.c +++ b/app/src/scrcpy.c @@ -13,6 +13,7 @@ # include #endif +#include "audio_player.h" #include "controller.h" #include "decoder.h" #include "demuxer.h" @@ -40,6 +41,7 @@ struct scrcpy { struct sc_server server; struct sc_screen screen; + struct sc_audio_player audio_player; struct sc_demuxer video_demuxer; struct sc_demuxer audio_demuxer; struct sc_decoder video_decoder; @@ -383,9 +385,16 @@ scrcpy(struct scrcpy_options *options) { } // Initialize SDL video in addition if display is enabled - if (options->display && SDL_Init(SDL_INIT_VIDEO)) { - LOGE("Could not initialize SDL: %s", SDL_GetError()); - goto end; + if (options->display) { + if (SDL_Init(SDL_INIT_VIDEO)) { + LOGE("Could not initialize SDL video: %s", SDL_GetError()); + goto end; + } + + if (options->audio && SDL_Init(SDL_INIT_AUDIO)) { + LOGE("Could not initialize SDL audio: %s", SDL_GetError()); + goto end; + } } sdl_configure(options->display, options->disable_screensaver); @@ -663,6 +672,11 @@ aoa_hid_end: screen_initialized = true; sc_decoder_add_sink(&s->video_decoder, &s->screen.frame_sink); + + if (options->audio) { + sc_audio_player_init(&s->audio_player); + sc_decoder_add_sink(&s->audio_decoder, &s->audio_player.frame_sink); + } } #ifdef HAVE_V4L2 diff --git a/cross_win32.txt b/cross_win32.txt index 32226949..73922ad4 100644 --- a/cross_win32.txt +++ b/cross_win32.txt @@ -19,6 +19,7 @@ endian = 'little' ffmpeg_avcodec = 'avcodec-58' ffmpeg_avformat = 'avformat-58' ffmpeg_avutil = 'avutil-56' +ffmpeg_swresample = 'swresample-3' prebuilt_ffmpeg = 'ffmpeg-win32-4.3.1' prebuilt_sdl2 = 'SDL2-2.26.1/i686-w64-mingw32' prebuilt_libusb_root = 'libusb-1.0.26' diff --git a/cross_win64.txt b/cross_win64.txt index 4dde4ab1..0e9bd29f 100644 --- a/cross_win64.txt +++ b/cross_win64.txt @@ -19,6 +19,7 @@ endian = 'little' ffmpeg_avcodec = 'avcodec-59' ffmpeg_avformat = 'avformat-59' ffmpeg_avutil = 'avutil-57' +ffmpeg_swresample = 'swresample-4' prebuilt_ffmpeg = 'ffmpeg-win64-5.1.2' prebuilt_sdl2 = 'SDL2-2.26.1/x86_64-w64-mingw32' prebuilt_libusb_root = 'libusb-1.0.26'