From fa99c6bcdd6e493eb27c09783a6669ed0168cf09 Mon Sep 17 00:00:00 2001 From: Romain Vimont Date: Sat, 10 May 2025 10:31:53 +0200 Subject: [PATCH 1/4] Rename "codec meta" to "stream meta" The stream metadata will contain both: - the codec id at the start of the stream - the session metadata (video width and height) at the start of every "session" (typically on rotation) --- doc/develop.md | 5 ++--- .../src/main/java/com/genymobile/scrcpy/Options.java | 12 ++++++------ .../src/main/java/com/genymobile/scrcpy/Server.java | 4 ++-- .../java/com/genymobile/scrcpy/device/Streamer.java | 8 ++++---- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/doc/develop.md b/doc/develop.md index 21949ea6..002ec4b9 100644 --- a/doc/develop.md +++ b/doc/develop.md @@ -409,12 +409,11 @@ with any client which uses the same protocol. For simplicity, some [server-specific options] have been added to produce raw streams easily: - - `send_device_meta=false`: disable the device metata (in practice, the device + - `send_device_meta=false`: disable device metadata (in practice, the device name) sent on the _first_ socket - `send_frame_meta=false`: disable the 12-byte header for each packet - `send_dummy_byte`: disable the dummy byte sent on forward connections - - `send_codec_meta`: disable the codec information (and initial device size for - video) + - `send_stream_meta`: disable codec and video size metadata - `raw_stream`: disable all the above [server-specific options]: https://github.com/Genymobile/scrcpy/blob/a3cdf1a6b86ea22786e1f7d09b9c202feabc6949/server/src/main/java/com/genymobile/scrcpy/Options.java#L309-L329 diff --git a/server/src/main/java/com/genymobile/scrcpy/Options.java b/server/src/main/java/com/genymobile/scrcpy/Options.java index 66bb68e8..89508942 100644 --- a/server/src/main/java/com/genymobile/scrcpy/Options.java +++ b/server/src/main/java/com/genymobile/scrcpy/Options.java @@ -78,7 +78,7 @@ public class Options { private boolean sendDeviceMeta = true; // send device name and size private boolean sendFrameMeta = true; // send PTS so that the client may record properly private boolean sendDummyByte = true; // write a byte on start to detect connection issues - private boolean sendCodecMeta = true; // write the codec metadata before the stream + private boolean sendStreamMeta = true; // write the stream metadata (codec and session) public Ln.Level getLogLevel() { return logLevel; @@ -284,8 +284,8 @@ public class Options { return sendDummyByte; } - public boolean getSendCodecMeta() { - return sendCodecMeta; + public boolean getSendStreamMeta() { + return sendStreamMeta; } @SuppressWarnings("MethodLength") @@ -500,8 +500,8 @@ public class Options { case "send_dummy_byte": options.sendDummyByte = Boolean.parseBoolean(value); break; - case "send_codec_meta": - options.sendCodecMeta = Boolean.parseBoolean(value); + case "send_stream_meta": + options.sendStreamMeta = Boolean.parseBoolean(value); break; case "raw_stream": boolean rawStream = Boolean.parseBoolean(value); @@ -509,7 +509,7 @@ public class Options { options.sendDeviceMeta = false; options.sendFrameMeta = false; options.sendDummyByte = false; - options.sendCodecMeta = false; + options.sendStreamMeta = false; } break; default: diff --git a/server/src/main/java/com/genymobile/scrcpy/Server.java b/server/src/main/java/com/genymobile/scrcpy/Server.java index 46f3294f..40774579 100644 --- a/server/src/main/java/com/genymobile/scrcpy/Server.java +++ b/server/src/main/java/com/genymobile/scrcpy/Server.java @@ -126,7 +126,7 @@ public final class Server { audioCapture = new AudioPlaybackCapture(options.getAudioDup()); } - Streamer audioStreamer = new Streamer(connection.getAudioFd(), audioCodec, options.getSendCodecMeta(), options.getSendFrameMeta()); + Streamer audioStreamer = new Streamer(connection.getAudioFd(), audioCodec, options.getSendStreamMeta(), options.getSendFrameMeta()); AsyncProcessor audioRecorder; if (audioCodec == AudioCodec.RAW) { audioRecorder = new AudioRawRecorder(audioCapture, audioStreamer); @@ -137,7 +137,7 @@ public final class Server { } if (video) { - Streamer videoStreamer = new Streamer(connection.getVideoFd(), options.getVideoCodec(), options.getSendCodecMeta(), + Streamer videoStreamer = new Streamer(connection.getVideoFd(), options.getVideoCodec(), options.getSendStreamMeta(), options.getSendFrameMeta()); SurfaceCapture surfaceCapture; if (options.getVideoSource() == VideoSource.DISPLAY) { diff --git a/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java b/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java index f54d0567..93c5077b 100644 --- a/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java +++ b/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java @@ -19,7 +19,7 @@ public final class Streamer { private final FileDescriptor fd; private final Codec codec; - private final boolean sendCodecMeta; + private final boolean sendStreamMeta; private final boolean sendFrameMeta; private final ByteBuffer headerBuffer = ByteBuffer.allocate(12); @@ -27,7 +27,7 @@ public final class Streamer { public Streamer(FileDescriptor fd, Codec codec, boolean sendCodecMeta, boolean sendFrameMeta) { this.fd = fd; this.codec = codec; - this.sendCodecMeta = sendCodecMeta; + this.sendStreamMeta = sendCodecMeta; this.sendFrameMeta = sendFrameMeta; } @@ -36,7 +36,7 @@ public final class Streamer { } public void writeAudioHeader() throws IOException { - if (sendCodecMeta) { + if (sendStreamMeta) { ByteBuffer buffer = ByteBuffer.allocate(4); buffer.putInt(codec.getId()); buffer.flip(); @@ -45,7 +45,7 @@ public final class Streamer { } public void writeVideoHeader(Size videoSize) throws IOException { - if (sendCodecMeta) { + if (sendStreamMeta) { ByteBuffer buffer = ByteBuffer.allocate(12); buffer.putInt(codec.getId()); buffer.putInt(videoSize.getWidth()); From f9a1803126ab005d2fba9c48ba27781bd80bba44 Mon Sep 17 00:00:00 2001 From: Romain Vimont Date: Sat, 10 May 2025 10:23:02 +0200 Subject: [PATCH 2/4] Add session metadata for video stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new packet type, a "session" packet, containing metadata about the encoding session. It is used only for the video stream, and currently includes the video resolution. For illustration, here is a sequence of packets on the video stream: device rotation v CODEC | SESSION | MEDIA | MEDIA | … | SESSION | MEDIA | MEDIA | … 1920x1080 <-----------------> 1080x1920 <------------------ encoding session 1 encoding session 2 This metadata is not strictly necessary, since the video resolution can be determined after decoding. However, it allows detection of cases where the encoder does not respect the requested size (and logs a warning), even without decoding (e.g., when there is no video playback). Additional metadata could be added later if necessary, for example the actual device rotation. Refs #5918 Refs #5984 Co-authored-by: gz0119 --- app/src/audio_player.c | 5 +- app/src/decoder.c | 25 ++- app/src/delay_buffer.c | 5 +- app/src/demuxer.c | 151 ++++++++++++------ app/src/recorder.c | 10 +- app/src/screen.c | 4 +- app/src/trait/frame_sink.h | 11 +- app/src/trait/frame_source.c | 20 ++- app/src/trait/frame_source.h | 7 +- app/src/trait/packet_sink.h | 18 ++- app/src/trait/packet_source.c | 19 ++- app/src/trait/packet_source.h | 7 +- app/src/v4l2_sink.c | 9 +- .../genymobile/scrcpy/device/Streamer.java | 24 ++- .../scrcpy/video/SurfaceEncoder.java | 8 +- 15 files changed, 241 insertions(+), 82 deletions(-) diff --git a/app/src/audio_player.c b/app/src/audio_player.c index 9413c2ea..dbd82737 100644 --- a/app/src/audio_player.c +++ b/app/src/audio_player.c @@ -30,7 +30,10 @@ sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, static bool sc_audio_player_frame_sink_open(struct sc_frame_sink *sink, - const AVCodecContext *ctx) { + const AVCodecContext *ctx, + const struct sc_stream_session *session) { + (void) session; + struct sc_audio_player *ap = DOWNCAST(sink); #ifdef SCRCPY_LAVU_HAS_CHLAYOUT diff --git a/app/src/decoder.c b/app/src/decoder.c index 4d0a1daf..e2269b2b 100644 --- a/app/src/decoder.c +++ b/app/src/decoder.c @@ -10,14 +10,15 @@ #define DOWNCAST(SINK) container_of(SINK, struct sc_decoder, packet_sink) static bool -sc_decoder_open(struct sc_decoder *decoder, AVCodecContext *ctx) { +sc_decoder_open(struct sc_decoder *decoder, AVCodecContext *ctx, + const struct sc_stream_session *session) { decoder->frame = av_frame_alloc(); if (!decoder->frame) { LOG_OOM(); return false; } - if (!sc_frame_source_sinks_open(&decoder->frame_source, ctx)) { + if (!sc_frame_source_sinks_open(&decoder->frame_source, ctx, session)) { av_frame_free(&decoder->frame); return false; } @@ -74,9 +75,16 @@ sc_decoder_push(struct sc_decoder *decoder, const AVPacket *packet) { } static bool -sc_decoder_packet_sink_open(struct sc_packet_sink *sink, AVCodecContext *ctx) { +sc_decoder_push_session(struct sc_decoder *decoder, + const struct sc_stream_session *session) { + return sc_frame_source_sinks_push_session(&decoder->frame_source, session); +} + +static bool +sc_decoder_packet_sink_open(struct sc_packet_sink *sink, AVCodecContext *ctx, + const struct sc_stream_session *session) { struct sc_decoder *decoder = DOWNCAST(sink); - return sc_decoder_open(decoder, ctx); + return sc_decoder_open(decoder, ctx, session); } static void @@ -92,6 +100,14 @@ sc_decoder_packet_sink_push(struct sc_packet_sink *sink, return sc_decoder_push(decoder, packet); } +static bool +sc_decoder_packet_sink_push_session(struct sc_packet_sink *sink, + const struct sc_stream_session *session) { + + struct sc_decoder *decoder = DOWNCAST(sink); + return sc_decoder_push_session(decoder, session); +} + void sc_decoder_init(struct sc_decoder *decoder, const char *name) { decoder->name = name; // statically allocated @@ -101,6 +117,7 @@ sc_decoder_init(struct sc_decoder *decoder, const char *name) { .open = sc_decoder_packet_sink_open, .close = sc_decoder_packet_sink_close, .push = sc_decoder_packet_sink_push, + .push_session = sc_decoder_packet_sink_push_session, }; decoder->packet_sink.ops = &ops; diff --git a/app/src/delay_buffer.c b/app/src/delay_buffer.c index f75c6f72..2a6fcdb3 100644 --- a/app/src/delay_buffer.c +++ b/app/src/delay_buffer.c @@ -109,7 +109,8 @@ stopped: static bool sc_delay_buffer_frame_sink_open(struct sc_frame_sink *sink, - const AVCodecContext *ctx) { + const AVCodecContext *ctx, + const struct sc_stream_session *session) { struct sc_delay_buffer *db = DOWNCAST(sink); (void) ctx; @@ -132,7 +133,7 @@ sc_delay_buffer_frame_sink_open(struct sc_frame_sink *sink, sc_vecdeque_init(&db->queue); db->stopped = false; - if (!sc_frame_source_sinks_open(&db->frame_source, ctx)) { + if (!sc_frame_source_sinks_open(&db->frame_source, ctx, session)) { goto error_destroy_wait_cond; } diff --git a/app/src/demuxer.c b/app/src/demuxer.c index 885cd6ee..73a4bb2f 100644 --- a/app/src/demuxer.c +++ b/app/src/demuxer.c @@ -11,8 +11,8 @@ #define SC_PACKET_HEADER_SIZE 12 -#define SC_PACKET_FLAG_CONFIG (UINT64_C(1) << 63) -#define SC_PACKET_FLAG_KEY_FRAME (UINT64_C(1) << 62) +#define SC_PACKET_FLAG_CONFIG (UINT64_C(1) << 62) +#define SC_PACKET_FLAG_KEY_FRAME (UINT64_C(1) << 61) #define SC_PACKET_PTS_MASK (SC_PACKET_FLAG_KEY_FRAME - 1) @@ -63,48 +63,75 @@ sc_demuxer_recv_codec_id(struct sc_demuxer *demuxer, uint32_t *codec_id) { return true; } -static bool -sc_demuxer_recv_video_size(struct sc_demuxer *demuxer, uint32_t *width, - uint32_t *height) { - uint8_t data[8]; - ssize_t r = net_recv_all(demuxer->socket, data, 8); - if (r < 8) { - return false; - } - - *width = sc_read32be(data); - *height = sc_read32be(data + 4); - return true; -} - -static bool -sc_demuxer_recv_packet(struct sc_demuxer *demuxer, AVPacket *packet) { +static inline bool +sc_demuxer_recv_header(struct sc_demuxer *demuxer, + uint8_t buf[static SC_PACKET_HEADER_SIZE]) { // The video and audio streams contain a sequence of raw packets (as // provided by MediaCodec), each prefixed with a "meta" header. // - // The "meta" header length is 12 bytes: + // The "meta" header length is 12 bytes. + // + // + // If the MSB is 1, then it is a session packet (for a video stream only), + // which only contains a 12-byte header: + // + // byte 0 byte 1 byte 2 byte 3 + // 10000000 00000000 00000000 00000000 + // ^<--------------------------------> + // | padding + // `- session packet flag + // + // byte 4 byte 5 byte 6 byte 7 byte 8 byte 9 byte 10 byte 11 + // ........ ........ ........ ........ ........ ........ ........ ........ + // <---------------------------------> <---------------------------------> + // video width video height + // + // + // If the MSB is 0, then it is a media packet, comprised of a 12-byte header + // followed by bytes containing the packet/frame: + // // [. . . . . . . .|. . . .]. . . . . . . . . . . . . . . ... // <-------------> <-----> <-----------------------------... // PTS packet raw packet // size // - // It is followed by bytes containing the packet/frame. - // // The most significant bits of the PTS are used for packet flags: // - // byte 7 byte 6 byte 5 byte 4 byte 3 byte 2 byte 1 byte 0 - // CK...... ........ ........ ........ ........ ........ ........ ........ - // ^^<-------------------------------------------------------------------> - // || PTS - // | `- key frame - // `-- config packet + // byte 0 byte 1 byte 2 byte 3 byte 4 byte 5 byte 6 byte 7 + // 0CK..... ........ ........ ........ ........ ........ ........ ........ + // ^^^<------------------------------------------------------------------> + // ||| PTS + // || `- key frame + // | `-- config packet + // `--- media packet flag + // + // byte 8 byte 9 byte 10 byte 11 + // ........ ........ ........ ........ ........ ........ . . . + // <---------------------------------> <---------------- . . . + // packet size raw packet + // + ssize_t r = net_recv_all(demuxer->socket, buf, SC_PACKET_HEADER_SIZE); + assert(r <= SC_PACKET_HEADER_SIZE); + return r == SC_PACKET_HEADER_SIZE; +} - uint8_t header[SC_PACKET_HEADER_SIZE]; - ssize_t r = net_recv_all(demuxer->socket, header, SC_PACKET_HEADER_SIZE); - if (r < SC_PACKET_HEADER_SIZE) { - return false; - } +static bool +sc_demuxer_is_session(const uint8_t *header) { + return header[0] & 0x80; +} +static void +sc_demuxer_parse_session(const uint8_t *header, + struct sc_stream_session *session) { + assert(sc_demuxer_is_session(header)); + session->video.width = sc_read32be(&header[4]); + session->video.height = sc_read32be(&header[8]); +} + +static bool +sc_demuxer_recv_packet(struct sc_demuxer *demuxer, const uint8_t *header, + AVPacket *packet) { + assert(!sc_demuxer_is_session(header)); uint64_t pts_flags = sc_read64be(header); uint32_t len = sc_read32be(&header[8]); assert(len); @@ -114,7 +141,7 @@ sc_demuxer_recv_packet(struct sc_demuxer *demuxer, AVPacket *packet) { return false; } - r = net_recv_all(demuxer->socket, packet->data, len); + ssize_t r = net_recv_all(demuxer->socket, packet->data, len); if (r < 0 || ((uint32_t) r) < len) { av_packet_unref(packet); return false; @@ -187,17 +214,28 @@ run_demuxer(void *data) { codec_ctx->flags |= AV_CODEC_FLAG_LOW_DELAY; + uint8_t header[SC_PACKET_HEADER_SIZE]; + struct sc_stream_session session_data; + + struct sc_stream_session *session = NULL; if (codec->type == AVMEDIA_TYPE_VIDEO) { - uint32_t width; - uint32_t height; - ok = sc_demuxer_recv_video_size(demuxer, &width, &height); + bool ok = sc_demuxer_recv_header(demuxer, header); if (!ok) { goto finally_free_context; } - codec_ctx->width = width; - codec_ctx->height = height; + if (!sc_demuxer_is_session(header)) { + LOGE("Unexpected packet (not a session header)"); + goto finally_free_context; + } + + session = &session_data; + sc_demuxer_parse_session(header, session); + + codec_ctx->width = session_data.video.width; + codec_ctx->height = session_data.video.height; codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; + } else { // Hardcoded audio properties #ifdef SCRCPY_LAVU_HAS_CHLAYOUT @@ -219,7 +257,8 @@ run_demuxer(void *data) { goto finally_free_context; } - if (!sc_packet_source_sinks_open(&demuxer->packet_source, codec_ctx)) { + if (!sc_packet_source_sinks_open(&demuxer->packet_source, codec_ctx, + session)) { goto finally_free_context; } @@ -241,27 +280,39 @@ run_demuxer(void *data) { } for (;;) { - bool ok = sc_demuxer_recv_packet(demuxer, packet); + bool ok = sc_demuxer_recv_header(demuxer, header); if (!ok) { // end of stream status = SC_DEMUXER_STATUS_EOS; break; } - if (must_merge_config_packet) { - // Prepend any config packet to the next media packet - ok = sc_packet_merger_merge(&merger, packet); + if (sc_demuxer_is_session(header)) { + sc_demuxer_parse_session(header, &session_data); + ok = sc_packet_source_sinks_push_session(&demuxer->packet_source, + &session_data); if (!ok) { - av_packet_unref(packet); + // The sink already logged its concrete error break; } - } + } else { + sc_demuxer_recv_packet(demuxer, header, packet); - ok = sc_packet_source_sinks_push(&demuxer->packet_source, packet); - av_packet_unref(packet); - if (!ok) { - // The sink already logged its concrete error - break; + if (must_merge_config_packet) { + // Prepend any config packet to the next media packet + ok = sc_packet_merger_merge(&merger, packet); + if (!ok) { + av_packet_unref(packet); + break; + } + } + + ok = sc_packet_source_sinks_push(&demuxer->packet_source, packet); + av_packet_unref(packet); + if (!ok) { + // The sink already logged its concrete error + break; + } } } diff --git a/app/src/recorder.c b/app/src/recorder.c index c26f8f2d..f173d72e 100644 --- a/app/src/recorder.c +++ b/app/src/recorder.c @@ -541,7 +541,10 @@ sc_recorder_set_orientation(AVStream *stream, enum sc_orientation orientation) { static bool sc_recorder_video_packet_sink_open(struct sc_packet_sink *sink, - AVCodecContext *ctx) { + AVCodecContext *ctx, + const struct sc_stream_session *session) { + (void) session; + struct sc_recorder *recorder = DOWNCAST_VIDEO(sink); // only written from this thread, no need to lock assert(!recorder->video_init); @@ -635,7 +638,10 @@ sc_recorder_video_packet_sink_push(struct sc_packet_sink *sink, static bool sc_recorder_audio_packet_sink_open(struct sc_packet_sink *sink, - AVCodecContext *ctx) { + AVCodecContext *ctx, + const struct sc_stream_session *session) { + (void) session; + struct sc_recorder *recorder = DOWNCAST_AUDIO(sink); assert(recorder->audio); // only written from this thread, no need to lock diff --git a/app/src/screen.c b/app/src/screen.c index 1d694f12..51f7bf46 100644 --- a/app/src/screen.c +++ b/app/src/screen.c @@ -252,9 +252,11 @@ event_watcher(void *data, SDL_Event *event) { static bool sc_screen_frame_sink_open(struct sc_frame_sink *sink, - const AVCodecContext *ctx) { + const AVCodecContext *ctx, + const struct sc_stream_session *session) { assert(ctx->pix_fmt == AV_PIX_FMT_YUV420P); (void) ctx; + (void) session; struct sc_screen *screen = DOWNCAST(sink); diff --git a/app/src/trait/frame_sink.h b/app/src/trait/frame_sink.h index 67be4d46..dc94b1fc 100644 --- a/app/src/trait/frame_sink.h +++ b/app/src/trait/frame_sink.h @@ -6,6 +6,8 @@ #include #include +#include "trait/packet_sink.h" + /** * Frame sink trait. * @@ -17,9 +19,16 @@ struct sc_frame_sink { struct sc_frame_sink_ops { /* The codec context is valid until the sink is closed */ - bool (*open)(struct sc_frame_sink *sink, const AVCodecContext *ctx); + bool (*open)(struct sc_frame_sink *sink, const AVCodecContext *ctx, + const struct sc_stream_session *session); void (*close)(struct sc_frame_sink *sink); bool (*push)(struct sc_frame_sink *sink, const AVFrame *frame); + + /** + * Optional callback to be notified of a new stream session. + */ + bool (*push_session)(struct sc_frame_sink *sink, + const struct sc_stream_session *session); }; #endif diff --git a/app/src/trait/frame_source.c b/app/src/trait/frame_source.c index 56848309..d34e628b 100644 --- a/app/src/trait/frame_source.c +++ b/app/src/trait/frame_source.c @@ -27,11 +27,12 @@ sc_frame_source_sinks_close_firsts(struct sc_frame_source *source, bool sc_frame_source_sinks_open(struct sc_frame_source *source, - const AVCodecContext *ctx) { + const AVCodecContext *ctx, + const struct sc_stream_session *session) { assert(source->sink_count); for (unsigned i = 0; i < source->sink_count; ++i) { struct sc_frame_sink *sink = source->sinks[i]; - if (!sink->ops->open(sink, ctx)) { + if (!sink->ops->open(sink, ctx, session)) { sc_frame_source_sinks_close_firsts(source, i); return false; } @@ -59,3 +60,18 @@ sc_frame_source_sinks_push(struct sc_frame_source *source, return true; } + +bool +sc_frame_source_sinks_push_session(struct sc_frame_source *source, + const struct sc_stream_session *session) { + assert(source->sink_count); + for (unsigned i = 0; i < source->sink_count; ++i) { + struct sc_frame_sink *sink = source->sinks[i]; + if (sink->ops->push_session && + !sink->ops->push_session(sink, session)) { + return false; + } + } + + return true; +} diff --git a/app/src/trait/frame_source.h b/app/src/trait/frame_source.h index cb1ef905..0a1ea8f1 100644 --- a/app/src/trait/frame_source.h +++ b/app/src/trait/frame_source.h @@ -28,7 +28,8 @@ sc_frame_source_add_sink(struct sc_frame_source *source, bool sc_frame_source_sinks_open(struct sc_frame_source *source, - const AVCodecContext *ctx); + const AVCodecContext *ctx, + const struct sc_stream_session *session); void sc_frame_source_sinks_close(struct sc_frame_source *source); @@ -37,4 +38,8 @@ bool sc_frame_source_sinks_push(struct sc_frame_source *source, const AVFrame *frame); +bool +sc_frame_source_sinks_push_session(struct sc_frame_source *source, + const struct sc_stream_session *session); + #endif diff --git a/app/src/trait/packet_sink.h b/app/src/trait/packet_sink.h index e12dea12..db742b2f 100644 --- a/app/src/trait/packet_sink.h +++ b/app/src/trait/packet_sink.h @@ -15,12 +15,28 @@ struct sc_packet_sink { const struct sc_packet_sink_ops *ops; }; +struct sc_stream_session_video { + uint32_t width; + uint32_t height; +}; + +struct sc_stream_session { + struct sc_stream_session_video video; +}; + struct sc_packet_sink_ops { /* The codec context is valid until the sink is closed */ - bool (*open)(struct sc_packet_sink *sink, AVCodecContext *ctx); + bool (*open)(struct sc_packet_sink *sink, AVCodecContext *ctx, + const struct sc_stream_session *session); void (*close)(struct sc_packet_sink *sink); bool (*push)(struct sc_packet_sink *sink, const AVPacket *packet); + /** + * Optional callback to be notified of a new stream session. + */ + bool (*push_session)(struct sc_packet_sink *sink, + const struct sc_stream_session *session); + /*/ * Called when the input stream has been disabled at runtime. * diff --git a/app/src/trait/packet_source.c b/app/src/trait/packet_source.c index 0a2c6c4d..49cb6d14 100644 --- a/app/src/trait/packet_source.c +++ b/app/src/trait/packet_source.c @@ -27,11 +27,12 @@ sc_packet_source_sinks_close_firsts(struct sc_packet_source *source, bool sc_packet_source_sinks_open(struct sc_packet_source *source, - AVCodecContext *ctx) { + AVCodecContext *ctx, + const struct sc_stream_session *session) { assert(source->sink_count); for (unsigned i = 0; i < source->sink_count; ++i) { struct sc_packet_sink *sink = source->sinks[i]; - if (!sink->ops->open(sink, ctx)) { + if (!sink->ops->open(sink, ctx, session)) { sc_packet_source_sinks_close_firsts(source, i); return false; } @@ -60,6 +61,20 @@ sc_packet_source_sinks_push(struct sc_packet_source *source, return true; } +bool +sc_packet_source_sinks_push_session(struct sc_packet_source *source, + const struct sc_stream_session *session) { + assert(source->sink_count); + for (unsigned i = 0; i < source->sink_count; ++i) { + struct sc_packet_sink *sink = source->sinks[i]; + if (!sink->ops->push_session(sink, session)) { + return false; + } + } + + return true; +} + void sc_packet_source_sinks_disable(struct sc_packet_source *source) { assert(source->sink_count); diff --git a/app/src/trait/packet_source.h b/app/src/trait/packet_source.h index 8788021a..07be4970 100644 --- a/app/src/trait/packet_source.h +++ b/app/src/trait/packet_source.h @@ -28,7 +28,8 @@ sc_packet_source_add_sink(struct sc_packet_source *source, bool sc_packet_source_sinks_open(struct sc_packet_source *source, - AVCodecContext *ctx); + AVCodecContext *ctx, + const struct sc_stream_session *session); void sc_packet_source_sinks_close(struct sc_packet_source *source); @@ -37,6 +38,10 @@ bool sc_packet_source_sinks_push(struct sc_packet_source *source, const AVPacket *packet); +bool +sc_packet_source_sinks_push_session(struct sc_packet_source *source, + const struct sc_stream_session *session); + void sc_packet_source_sinks_disable(struct sc_packet_source *source); diff --git a/app/src/v4l2_sink.c b/app/src/v4l2_sink.c index da9e02ef..92551149 100644 --- a/app/src/v4l2_sink.c +++ b/app/src/v4l2_sink.c @@ -146,9 +146,11 @@ run_v4l2_sink(void *data) { } static bool -sc_v4l2_sink_open(struct sc_v4l2_sink *vs, const AVCodecContext *ctx) { +sc_v4l2_sink_open(struct sc_v4l2_sink *vs, const AVCodecContext *ctx, + const struct sc_stream_session *session) { assert(ctx->pix_fmt == AV_PIX_FMT_YUV420P); (void) ctx; + (void) session; bool ok = sc_frame_buffer_init(&vs->fb); if (!ok) { @@ -326,9 +328,10 @@ sc_v4l2_sink_push(struct sc_v4l2_sink *vs, const AVFrame *frame) { } static bool -sc_v4l2_frame_sink_open(struct sc_frame_sink *sink, const AVCodecContext *ctx) { +sc_v4l2_frame_sink_open(struct sc_frame_sink *sink, const AVCodecContext *ctx, + const struct sc_stream_session *session) { struct sc_v4l2_sink *vs = DOWNCAST(sink); - return sc_v4l2_sink_open(vs, ctx); + return sc_v4l2_sink_open(vs, ctx, session); } static void diff --git a/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java b/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java index 93c5077b..57abde5b 100644 --- a/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java +++ b/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java @@ -3,6 +3,7 @@ package com.genymobile.scrcpy.device; import com.genymobile.scrcpy.audio.AudioCodec; import com.genymobile.scrcpy.util.Codec; import com.genymobile.scrcpy.util.IO; +import com.genymobile.scrcpy.util.Ln; import android.media.MediaCodec; @@ -14,8 +15,9 @@ import java.util.Arrays; public final class Streamer { - private static final long PACKET_FLAG_CONFIG = 1L << 63; - private static final long PACKET_FLAG_KEY_FRAME = 1L << 62; + private static final long PACKET_FLAG_SESSION = 1L << 63; + private static final long PACKET_FLAG_CONFIG = 1L << 62; + private static final long PACKET_FLAG_KEY_FRAME = 1L << 61; private final FileDescriptor fd; private final Codec codec; @@ -44,12 +46,10 @@ public final class Streamer { } } - public void writeVideoHeader(Size videoSize) throws IOException { + public void writeVideoHeader() throws IOException { if (sendStreamMeta) { - ByteBuffer buffer = ByteBuffer.allocate(12); + ByteBuffer buffer = ByteBuffer.allocate(4); buffer.putInt(codec.getId()); - buffer.putInt(videoSize.getWidth()); - buffer.putInt(videoSize.getHeight()); buffer.flip(); IO.writeFully(fd, buffer); } @@ -89,6 +89,18 @@ public final class Streamer { writePacket(codecBuffer, pts, config, keyFrame); } + public void writeSessionMeta(int width, int height) throws IOException { + if (sendStreamMeta) { + headerBuffer.clear(); + + headerBuffer.putInt((int) (PACKET_FLAG_SESSION >> 32)); // Set the first bit to 1 + headerBuffer.putInt(width); + headerBuffer.putInt(height); + headerBuffer.flip(); + IO.writeFully(fd, headerBuffer); + } + } + private void writeFrameMeta(FileDescriptor fd, int packetSize, long pts, boolean config, boolean keyFrame) throws IOException { headerBuffer.clear(); diff --git a/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java b/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java index 236a5f48..461ba472 100644 --- a/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java +++ b/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java @@ -71,16 +71,13 @@ public class SurfaceEncoder implements AsyncProcessor { try { boolean alive; - boolean headerWritten = false; + + streamer.writeVideoHeader(); do { reset.consumeReset(); // If a capture reset was requested, it is implicitly fulfilled capture.prepare(); Size size = capture.getSize(); - if (!headerWritten) { - streamer.writeVideoHeader(size); - headerWritten = true; - } format.setInteger(MediaFormat.KEY_WIDTH, size.getWidth()); format.setInteger(MediaFormat.KEY_HEIGHT, size.getHeight()); @@ -107,6 +104,7 @@ public class SurfaceEncoder implements AsyncProcessor { boolean resetRequested = reset.consumeReset(); if (!resetRequested) { // If a reset is requested during encode(), it will interrupt the encoding by an EOS + streamer.writeSessionMeta(size.getWidth(), size.getHeight()); encode(mediaCodec, streamer); } // The capture might have been closed internally (for example if the camera is disconnected) From fe0bda4bc74d159fcd8aee7459c7c11a48e8a163 Mon Sep 17 00:00:00 2001 From: Romain Vimont Date: Sun, 15 Jun 2025 12:31:24 +0200 Subject: [PATCH 3/4] Properly handle session packets in delay_buffer The delay buffer must forward the session packets while preserving their order relative to media packets. --- app/src/delay_buffer.c | 142 ++++++++++++++++++++++++++++------------- app/src/delay_buffer.h | 17 +++-- 2 files changed, 111 insertions(+), 48 deletions(-) diff --git a/app/src/delay_buffer.c b/app/src/delay_buffer.c index 2a6fcdb3..a46f43b2 100644 --- a/app/src/delay_buffer.c +++ b/app/src/delay_buffer.c @@ -10,16 +10,18 @@ #define DOWNCAST(SINK) container_of(SINK, struct sc_delay_buffer, frame_sink) static bool -sc_delayed_frame_init(struct sc_delayed_frame *dframe, const AVFrame *frame) { - dframe->frame = av_frame_alloc(); - if (!dframe->frame) { +sc_delayed_packet_init_frame(struct sc_delayed_packet *dpacket, + const AVFrame *frame) { + dpacket->type = SC_DELAYED_PACKET_TYPE_FRAME; + dpacket->frame = av_frame_alloc(); + if (!dpacket->frame) { LOG_OOM(); return false; } - if (av_frame_ref(dframe->frame, frame)) { + if (av_frame_ref(dpacket->frame, frame)) { LOG_OOM(); - av_frame_free(&dframe->frame); + av_frame_free(&dpacket->frame); return false; } @@ -27,9 +29,18 @@ sc_delayed_frame_init(struct sc_delayed_frame *dframe, const AVFrame *frame) { } static void -sc_delayed_frame_destroy(struct sc_delayed_frame *dframe) { - av_frame_unref(dframe->frame); - av_frame_free(&dframe->frame); +sc_delayed_packet_init_session(struct sc_delayed_packet *dpacket, + const struct sc_stream_session *session) { + dpacket->type = SC_DELAYED_PACKET_TYPE_SESSION; + dpacket->session = *session; +} + +static void +sc_delayed_packet_destroy(struct sc_delayed_packet *dpacket) { + if (dpacket->type == SC_DELAYED_PACKET_TYPE_FRAME) { + av_frame_unref(dpacket->frame); + av_frame_free(&dpacket->frame); + } } static int @@ -50,43 +61,52 @@ run_buffering(void *data) { goto stopped; } - struct sc_delayed_frame dframe = sc_vecdeque_pop(&db->queue); + struct sc_delayed_packet dpacket = sc_vecdeque_pop(&db->queue); - sc_tick max_deadline = sc_tick_now() + db->delay; - // PTS (written by the server) are expressed in microseconds - sc_tick pts = SC_TICK_FROM_US(dframe.frame->pts); + bool ok; + if (dpacket.type == SC_DELAYED_PACKET_TYPE_FRAME) { + sc_tick max_deadline = sc_tick_now() + db->delay; + // PTS (written by the server) are expressed in microseconds + sc_tick pts = SC_TICK_FROM_US(dpacket.frame->pts); - bool timed_out = false; - while (!db->stopped && !timed_out) { - sc_tick deadline = sc_clock_to_system_time(&db->clock, pts) - + db->delay; - if (deadline > max_deadline) { - deadline = max_deadline; + bool timed_out = false; + while (!db->stopped && !timed_out) { + sc_tick deadline = sc_clock_to_system_time(&db->clock, pts) + + db->delay; + if (deadline > max_deadline) { + deadline = max_deadline; + } + + timed_out = + !sc_cond_timedwait(&db->wait_cond, &db->mutex, deadline); } - timed_out = - !sc_cond_timedwait(&db->wait_cond, &db->mutex, deadline); - } + bool stopped = db->stopped; + sc_mutex_unlock(&db->mutex); - bool stopped = db->stopped; - sc_mutex_unlock(&db->mutex); - - if (stopped) { - sc_delayed_frame_destroy(&dframe); - goto stopped; - } + if (stopped) { + sc_delayed_packet_destroy(&dpacket); + goto stopped; + } #ifdef SC_BUFFERING_DEBUG - LOGD("Buffering: %" PRItick ";%" PRItick ";%" PRItick, - pts, dframe.push_date, sc_tick_now()); + LOGD("Buffering: %" PRItick ";%" PRItick ";%" PRItick, + pts, dframe.push_date, sc_tick_now()); #endif - bool ok = sc_frame_source_sinks_push(&db->frame_source, dframe.frame); - sc_delayed_frame_destroy(&dframe); + ok = sc_frame_source_sinks_push(&db->frame_source, dpacket.frame); + } else { + assert(dpacket.type == SC_DELAYED_PACKET_TYPE_SESSION); + sc_mutex_unlock(&db->mutex); + ok = sc_frame_source_sinks_push_session(&db->frame_source, + &dpacket.session); + } + + sc_delayed_packet_destroy(&dpacket); if (!ok) { - LOGE("Delayed frame could not be pushed, stopping"); + LOGE("Delayed packet could not be pushed, stopping"); sc_mutex_lock(&db->mutex); - // Prevent to push any new frame + // Prevent to push any new packet db->stopped = true; sc_mutex_unlock(&db->mutex); goto stopped; @@ -98,8 +118,8 @@ stopped: // Flush queue while (!sc_vecdeque_is_empty(&db->queue)) { - struct sc_delayed_frame *dframe = sc_vecdeque_popref(&db->queue); - sc_delayed_frame_destroy(dframe); + struct sc_delayed_packet *dpacket = sc_vecdeque_popref(&db->queue); + sc_delayed_packet_destroy(dpacket); } LOGD("Buffering thread ended"); @@ -113,6 +133,7 @@ sc_delay_buffer_frame_sink_open(struct sc_frame_sink *sink, const struct sc_stream_session *session) { struct sc_delay_buffer *db = DOWNCAST(sink); (void) ctx; + (void) session; bool ok = sc_mutex_init(&db->mutex); if (!ok) { @@ -197,24 +218,56 @@ sc_delay_buffer_frame_sink_push(struct sc_frame_sink *sink, return sc_frame_source_sinks_push(&db->frame_source, frame); } - struct sc_delayed_frame dframe; - bool ok = sc_delayed_frame_init(&dframe, frame); - if (!ok) { + struct sc_delayed_packet *dpacket = sc_vecdeque_push_hole(&db->queue); + if (!dpacket) { sc_mutex_unlock(&db->mutex); + LOG_OOM(); return false; } -#ifdef SC_BUFFERING_DEBUG - dframe.push_date = sc_tick_now(); -#endif - - ok = sc_vecdeque_push(&db->queue, dframe); + bool ok = sc_delayed_packet_init_frame(dpacket, frame); if (!ok) { sc_mutex_unlock(&db->mutex); LOG_OOM(); return false; } +#ifdef SC_BUFFERING_DEBUG + dpacket->push_date = sc_tick_now(); +#endif + + sc_cond_signal(&db->queue_cond); + + sc_mutex_unlock(&db->mutex); + + return true; +} + +static bool +sc_delay_buffer_frame_sink_push_session(struct sc_frame_sink *sink, + const struct sc_stream_session *session) { + struct sc_delay_buffer *db = DOWNCAST(sink); + + sc_mutex_lock(&db->mutex); + + if (db->stopped) { + sc_mutex_unlock(&db->mutex); + return false; + } + + struct sc_delayed_packet *dpacket = sc_vecdeque_push_hole(&db->queue); + if (!dpacket) { + sc_mutex_unlock(&db->mutex); + LOG_OOM(); + return false; + } + + sc_delayed_packet_init_session(dpacket, session); + +#ifdef SC_BUFFERING_DEBUG + dpacket->push_date = sc_tick_now(); +#endif + sc_cond_signal(&db->queue_cond); sc_mutex_unlock(&db->mutex); @@ -236,6 +289,7 @@ sc_delay_buffer_init(struct sc_delay_buffer *db, sc_tick delay, .open = sc_delay_buffer_frame_sink_open, .close = sc_delay_buffer_frame_sink_close, .push = sc_delay_buffer_frame_sink_push, + .push_session = sc_delay_buffer_frame_sink_push_session, }; db->frame_sink.ops = &ops; diff --git a/app/src/delay_buffer.h b/app/src/delay_buffer.h index 61cd77e4..420684be 100644 --- a/app/src/delay_buffer.h +++ b/app/src/delay_buffer.h @@ -18,14 +18,23 @@ // forward declarations typedef struct AVFrame AVFrame; -struct sc_delayed_frame { - AVFrame *frame; +enum sc_delayed_packet_type { + SC_DELAYED_PACKET_TYPE_FRAME, + SC_DELAYED_PACKET_TYPE_SESSION, +}; + +struct sc_delayed_packet { + enum sc_delayed_packet_type type; + union { + AVFrame *frame; + struct sc_stream_session session; + }; #ifdef SC_BUFFERING_DEBUG sc_tick push_date; #endif }; -struct sc_delayed_frame_queue SC_VECDEQUE(struct sc_delayed_frame); +struct sc_delayed_packet_queue SC_VECDEQUE(struct sc_delayed_packet); struct sc_delay_buffer { struct sc_frame_source frame_source; // frame source trait @@ -40,7 +49,7 @@ struct sc_delay_buffer { sc_cond wait_cond; struct sc_clock clock; - struct sc_delayed_frame_queue queue; + struct sc_delayed_packet_queue queue; bool stopped; }; From 02047ff102ac50b5823aa6cefb5b5087a11d4834 Mon Sep 17 00:00:00 2001 From: Romain Vimont Date: Sun, 15 Jun 2025 12:37:07 +0200 Subject: [PATCH 4/4] Detect frame size mismatch in decoder Warn if the size of a decoded video frame does not match the session metadata. --- app/src/decoder.c | 36 ++++++++++++++++++++++++++++++++++++ app/src/decoder.h | 4 ++++ 2 files changed, 40 insertions(+) diff --git a/app/src/decoder.c b/app/src/decoder.c index e2269b2b..464414c6 100644 --- a/app/src/decoder.c +++ b/app/src/decoder.c @@ -25,6 +25,15 @@ sc_decoder_open(struct sc_decoder *decoder, AVCodecContext *ctx, decoder->ctx = ctx; + // A video stream must have a session + assert(session || ctx->codec_type != AVMEDIA_TYPE_VIDEO); + + if (session) { + decoder->session = *session; + } + + memset(&decoder->frame_size, 0, sizeof(decoder->frame_size)); + return true; } @@ -62,6 +71,32 @@ sc_decoder_push(struct sc_decoder *decoder, const AVPacket *packet) { } // a frame was received + + if (decoder->ctx->codec_type == AVMEDIA_TYPE_VIDEO) { + assert(decoder->frame->width >= 0); + assert(decoder->frame->height >= 0); + struct sc_size frame_size = { + .width = decoder->frame->width, + .height = decoder->frame->height, + }; + if (decoder->frame_size.width != frame_size.width + || decoder->frame_size.height != frame_size.height) { + // The frame size has changed, check if it matches the session + uint32_t sw = decoder->session.video.width; + uint32_t sh = decoder->session.video.height; + if (frame_size.width != sw || frame_size.height != sh) { + LOGW("Unexpected video size: %" PRIu32 "x%" PRIu32 + " (expected %" PRIu32 "x%" PRIu32 ")", + frame_size.width, frame_size.height, sw, sh); + + LOGW("The encoder did not respect the requested size, " + "please retry with a lower resolution (-m/--max-size)"); + } + } + + decoder->frame_size = frame_size; + } + bool ok = sc_frame_source_sinks_push(&decoder->frame_source, decoder->frame); av_frame_unref(decoder->frame); @@ -77,6 +112,7 @@ sc_decoder_push(struct sc_decoder *decoder, const AVPacket *packet) { static bool sc_decoder_push_session(struct sc_decoder *decoder, const struct sc_stream_session *session) { + decoder->session = *session; return sc_frame_source_sinks_push_session(&decoder->frame_source, session); } diff --git a/app/src/decoder.h b/app/src/decoder.h index 1f525fae..4a2f24e0 100644 --- a/app/src/decoder.h +++ b/app/src/decoder.h @@ -5,6 +5,7 @@ #include +#include "coords.h" #include "trait/frame_source.h" #include "trait/packet_sink.h" @@ -16,6 +17,9 @@ struct sc_decoder { AVCodecContext *ctx; AVFrame *frame; + + struct sc_stream_session session; // only initialized for video stream + struct sc_size frame_size; }; // The name must be statically allocated (e.g. a string literal)