From f9a1803126ab005d2fba9c48ba27781bd80bba44 Mon Sep 17 00:00:00 2001
From: Romain Vimont <rom@rom1v.com>
Date: Sat, 10 May 2025 10:23:02 +0200
Subject: [PATCH] Add session metadata for video stream
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a new packet type, a "session" packet, containing metadata
about the encoding session. It is used only for the video stream,
and currently includes the video resolution.

For illustration, here is a sequence of packets on the video stream:

                                        device rotation
                                        v
    CODEC | SESSION | MEDIA | MEDIA | … | SESSION | MEDIA | MEDIA | …
           1920x1080 <-----------------> 1080x1920 <------------------
                      encoding session 1            encoding session 2

This metadata is not strictly necessary, since the video resolution can
be determined after decoding. However, it allows detection of cases
where the encoder does not respect the requested size (and logs a
warning), even without decoding (e.g., when there is no video playback).

Additional metadata could be added later if necessary, for example the
actual device rotation.

Refs #5918 <https://github.com/Genymobile/scrcpy/pull/5918>
Refs #5984 <https://github.com/Genymobile/scrcpy/pull/5894>

Co-authored-by: gz0119 <liyong2@4399.com>
---
 app/src/audio_player.c                        |   5 +-
 app/src/decoder.c                             |  25 ++-
 app/src/delay_buffer.c                        |   5 +-
 app/src/demuxer.c                             | 151 ++++++++++++------
 app/src/recorder.c                            |  10 +-
 app/src/screen.c                              |   4 +-
 app/src/trait/frame_sink.h                    |  11 +-
 app/src/trait/frame_source.c                  |  20 ++-
 app/src/trait/frame_source.h                  |   7 +-
 app/src/trait/packet_sink.h                   |  18 ++-
 app/src/trait/packet_source.c                 |  19 ++-
 app/src/trait/packet_source.h                 |   7 +-
 app/src/v4l2_sink.c                           |   9 +-
 .../genymobile/scrcpy/device/Streamer.java    |  24 ++-
 .../scrcpy/video/SurfaceEncoder.java          |   8 +-
 15 files changed, 241 insertions(+), 82 deletions(-)

diff --git a/app/src/audio_player.c b/app/src/audio_player.c
index 9413c2ea..dbd82737 100644
--- a/app/src/audio_player.c
+++ b/app/src/audio_player.c
@@ -30,7 +30,10 @@ sc_audio_player_frame_sink_push(struct sc_frame_sink *sink,
 
 static bool
 sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
-                                const AVCodecContext *ctx) {
+                                const AVCodecContext *ctx,
+                                const struct sc_stream_session *session) {
+    (void) session;
+
     struct sc_audio_player *ap = DOWNCAST(sink);
 
 #ifdef SCRCPY_LAVU_HAS_CHLAYOUT
diff --git a/app/src/decoder.c b/app/src/decoder.c
index 4d0a1daf..e2269b2b 100644
--- a/app/src/decoder.c
+++ b/app/src/decoder.c
@@ -10,14 +10,15 @@
 #define DOWNCAST(SINK) container_of(SINK, struct sc_decoder, packet_sink)
 
 static bool
-sc_decoder_open(struct sc_decoder *decoder, AVCodecContext *ctx) {
+sc_decoder_open(struct sc_decoder *decoder, AVCodecContext *ctx,
+                const struct sc_stream_session *session) {
     decoder->frame = av_frame_alloc();
     if (!decoder->frame) {
         LOG_OOM();
         return false;
     }
 
-    if (!sc_frame_source_sinks_open(&decoder->frame_source, ctx)) {
+    if (!sc_frame_source_sinks_open(&decoder->frame_source, ctx, session)) {
         av_frame_free(&decoder->frame);
         return false;
     }
@@ -74,9 +75,16 @@ sc_decoder_push(struct sc_decoder *decoder, const AVPacket *packet) {
 }
 
 static bool
-sc_decoder_packet_sink_open(struct sc_packet_sink *sink, AVCodecContext *ctx) {
+sc_decoder_push_session(struct sc_decoder *decoder,
+                        const struct sc_stream_session *session) {
+    return sc_frame_source_sinks_push_session(&decoder->frame_source, session);
+}
+
+static bool
+sc_decoder_packet_sink_open(struct sc_packet_sink *sink, AVCodecContext *ctx,
+                            const struct sc_stream_session *session) {
     struct sc_decoder *decoder = DOWNCAST(sink);
-    return sc_decoder_open(decoder, ctx);
+    return sc_decoder_open(decoder, ctx, session);
 }
 
 static void
@@ -92,6 +100,14 @@ sc_decoder_packet_sink_push(struct sc_packet_sink *sink,
     return sc_decoder_push(decoder, packet);
 }
 
+static bool
+sc_decoder_packet_sink_push_session(struct sc_packet_sink *sink,
+                                    const struct sc_stream_session *session) {
+
+    struct sc_decoder *decoder = DOWNCAST(sink);
+    return sc_decoder_push_session(decoder, session);
+}
+
 void
 sc_decoder_init(struct sc_decoder *decoder, const char *name) {
     decoder->name = name; // statically allocated
@@ -101,6 +117,7 @@ sc_decoder_init(struct sc_decoder *decoder, const char *name) {
         .open = sc_decoder_packet_sink_open,
         .close = sc_decoder_packet_sink_close,
         .push = sc_decoder_packet_sink_push,
+        .push_session = sc_decoder_packet_sink_push_session,
     };
 
     decoder->packet_sink.ops = &ops;
diff --git a/app/src/delay_buffer.c b/app/src/delay_buffer.c
index f75c6f72..2a6fcdb3 100644
--- a/app/src/delay_buffer.c
+++ b/app/src/delay_buffer.c
@@ -109,7 +109,8 @@ stopped:
 
 static bool
 sc_delay_buffer_frame_sink_open(struct sc_frame_sink *sink,
-                                const AVCodecContext *ctx) {
+                                const AVCodecContext *ctx,
+                                const struct sc_stream_session *session) {
     struct sc_delay_buffer *db = DOWNCAST(sink);
     (void) ctx;
 
@@ -132,7 +133,7 @@ sc_delay_buffer_frame_sink_open(struct sc_frame_sink *sink,
     sc_vecdeque_init(&db->queue);
     db->stopped = false;
 
-    if (!sc_frame_source_sinks_open(&db->frame_source, ctx)) {
+    if (!sc_frame_source_sinks_open(&db->frame_source, ctx, session)) {
         goto error_destroy_wait_cond;
     }
 
diff --git a/app/src/demuxer.c b/app/src/demuxer.c
index 885cd6ee..73a4bb2f 100644
--- a/app/src/demuxer.c
+++ b/app/src/demuxer.c
@@ -11,8 +11,8 @@
 
 #define SC_PACKET_HEADER_SIZE 12
 
-#define SC_PACKET_FLAG_CONFIG    (UINT64_C(1) << 63)
-#define SC_PACKET_FLAG_KEY_FRAME (UINT64_C(1) << 62)
+#define SC_PACKET_FLAG_CONFIG    (UINT64_C(1) << 62)
+#define SC_PACKET_FLAG_KEY_FRAME (UINT64_C(1) << 61)
 
 #define SC_PACKET_PTS_MASK (SC_PACKET_FLAG_KEY_FRAME - 1)
 
@@ -63,48 +63,75 @@ sc_demuxer_recv_codec_id(struct sc_demuxer *demuxer, uint32_t *codec_id) {
     return true;
 }
 
-static bool
-sc_demuxer_recv_video_size(struct sc_demuxer *demuxer, uint32_t *width,
-                           uint32_t *height) {
-    uint8_t data[8];
-    ssize_t r = net_recv_all(demuxer->socket, data, 8);
-    if (r < 8) {
-        return false;
-    }
-
-    *width = sc_read32be(data);
-    *height = sc_read32be(data + 4);
-    return true;
-}
-
-static bool
-sc_demuxer_recv_packet(struct sc_demuxer *demuxer, AVPacket *packet) {
+static inline bool
+sc_demuxer_recv_header(struct sc_demuxer *demuxer,
+                       uint8_t buf[static SC_PACKET_HEADER_SIZE]) {
     // The video and audio streams contain a sequence of raw packets (as
     // provided by MediaCodec), each prefixed with a "meta" header.
     //
-    // The "meta" header length is 12 bytes:
+    // The "meta" header length is 12 bytes.
+    //
+    //
+    // If the MSB is 1, then it is a session packet (for a video stream only),
+    // which only contains a 12-byte header:
+    //
+    //  byte 0   byte 1   byte 2   byte 3
+    // 10000000 00000000 00000000 00000000
+    // ^<-------------------------------->
+    // |               padding
+    //  `- session packet flag
+    //
+    //  byte 4   byte 5   byte 6   byte 7   byte 8   byte 9   byte 10  byte 11
+    // ........ ........ ........ ........ ........ ........ ........ ........
+    // <---------------------------------> <--------------------------------->
+    //             video width                         video height
+    //
+    //
+    // If the MSB is 0, then it is a media packet, comprised of a 12-byte header
+    // followed by <packet_size> bytes containing the packet/frame:
+    //
     // [. . . . . . . .|. . . .]. . . . . . . . . . . . . . . ...
     //  <-------------> <-----> <-----------------------------...
     //        PTS        packet        raw packet
     //                    size
     //
-    // It is followed by <packet_size> bytes containing the packet/frame.
-    //
     // The most significant bits of the PTS are used for packet flags:
     //
-    //  byte 7   byte 6   byte 5   byte 4   byte 3   byte 2   byte 1   byte 0
-    // CK...... ........ ........ ........ ........ ........ ........ ........
-    // ^^<------------------------------------------------------------------->
-    // ||                                PTS
-    // | `- key frame
-    //  `-- config packet
+    //  byte 0   byte 1   byte 2   byte 3   byte 4   byte 5   byte 6   byte 7
+    // 0CK..... ........ ........ ........ ........ ........ ........ ........
+    // ^^^<------------------------------------------------------------------>
+    // |||                                PTS
+    // || `- key frame
+    // | `-- config packet
+    //  `--- media packet flag
+    //
+    //  byte 8   byte 9   byte 10  byte 11
+    // ........ ........ ........ ........ ........ ........ . . .
+    // <---------------------------------> <---------------- . . .
+    //            packet size                       raw packet
+    //
+    ssize_t r = net_recv_all(demuxer->socket, buf, SC_PACKET_HEADER_SIZE);
+    assert(r <= SC_PACKET_HEADER_SIZE);
+    return r == SC_PACKET_HEADER_SIZE;
+}
 
-    uint8_t header[SC_PACKET_HEADER_SIZE];
-    ssize_t r = net_recv_all(demuxer->socket, header, SC_PACKET_HEADER_SIZE);
-    if (r < SC_PACKET_HEADER_SIZE) {
-        return false;
-    }
+static bool
+sc_demuxer_is_session(const uint8_t *header) {
+    return header[0] & 0x80;
+}
 
+static void
+sc_demuxer_parse_session(const uint8_t *header,
+                         struct sc_stream_session *session) {
+    assert(sc_demuxer_is_session(header));
+    session->video.width = sc_read32be(&header[4]);
+    session->video.height = sc_read32be(&header[8]);
+}
+
+static bool
+sc_demuxer_recv_packet(struct sc_demuxer *demuxer, const uint8_t *header,
+                       AVPacket *packet) {
+    assert(!sc_demuxer_is_session(header));
     uint64_t pts_flags = sc_read64be(header);
     uint32_t len = sc_read32be(&header[8]);
     assert(len);
@@ -114,7 +141,7 @@ sc_demuxer_recv_packet(struct sc_demuxer *demuxer, AVPacket *packet) {
         return false;
     }
 
-    r = net_recv_all(demuxer->socket, packet->data, len);
+    ssize_t r = net_recv_all(demuxer->socket, packet->data, len);
     if (r < 0 || ((uint32_t) r) < len) {
         av_packet_unref(packet);
         return false;
@@ -187,17 +214,28 @@ run_demuxer(void *data) {
 
     codec_ctx->flags |= AV_CODEC_FLAG_LOW_DELAY;
 
+    uint8_t header[SC_PACKET_HEADER_SIZE];
+    struct sc_stream_session session_data;
+
+    struct sc_stream_session *session = NULL;
     if (codec->type == AVMEDIA_TYPE_VIDEO) {
-        uint32_t width;
-        uint32_t height;
-        ok = sc_demuxer_recv_video_size(demuxer, &width, &height);
+        bool ok = sc_demuxer_recv_header(demuxer, header);
         if (!ok) {
             goto finally_free_context;
         }
 
-        codec_ctx->width = width;
-        codec_ctx->height = height;
+        if (!sc_demuxer_is_session(header)) {
+            LOGE("Unexpected packet (not a session header)");
+            goto finally_free_context;
+        }
+
+        session = &session_data;
+        sc_demuxer_parse_session(header, session);
+
+        codec_ctx->width = session_data.video.width;
+        codec_ctx->height = session_data.video.height;
         codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
+
     } else {
         // Hardcoded audio properties
 #ifdef SCRCPY_LAVU_HAS_CHLAYOUT
@@ -219,7 +257,8 @@ run_demuxer(void *data) {
         goto finally_free_context;
     }
 
-    if (!sc_packet_source_sinks_open(&demuxer->packet_source, codec_ctx)) {
+    if (!sc_packet_source_sinks_open(&demuxer->packet_source, codec_ctx,
+                                     session)) {
         goto finally_free_context;
     }
 
@@ -241,27 +280,39 @@ run_demuxer(void *data) {
     }
 
     for (;;) {
-        bool ok = sc_demuxer_recv_packet(demuxer, packet);
+        bool ok = sc_demuxer_recv_header(demuxer, header);
         if (!ok) {
             // end of stream
             status = SC_DEMUXER_STATUS_EOS;
             break;
         }
 
-        if (must_merge_config_packet) {
-            // Prepend any config packet to the next media packet
-            ok = sc_packet_merger_merge(&merger, packet);
+        if (sc_demuxer_is_session(header)) {
+            sc_demuxer_parse_session(header, &session_data);
+            ok = sc_packet_source_sinks_push_session(&demuxer->packet_source,
+                                                     &session_data);
             if (!ok) {
-                av_packet_unref(packet);
+                // The sink already logged its concrete error
                 break;
             }
-        }
+        } else {
+            sc_demuxer_recv_packet(demuxer, header, packet);
 
-        ok = sc_packet_source_sinks_push(&demuxer->packet_source, packet);
-        av_packet_unref(packet);
-        if (!ok) {
-            // The sink already logged its concrete error
-            break;
+            if (must_merge_config_packet) {
+                // Prepend any config packet to the next media packet
+                ok = sc_packet_merger_merge(&merger, packet);
+                if (!ok) {
+                    av_packet_unref(packet);
+                    break;
+                }
+            }
+
+            ok = sc_packet_source_sinks_push(&demuxer->packet_source, packet);
+            av_packet_unref(packet);
+            if (!ok) {
+                // The sink already logged its concrete error
+                break;
+            }
         }
     }
 
diff --git a/app/src/recorder.c b/app/src/recorder.c
index c26f8f2d..f173d72e 100644
--- a/app/src/recorder.c
+++ b/app/src/recorder.c
@@ -541,7 +541,10 @@ sc_recorder_set_orientation(AVStream *stream, enum sc_orientation orientation) {
 
 static bool
 sc_recorder_video_packet_sink_open(struct sc_packet_sink *sink,
-                                   AVCodecContext *ctx) {
+                                   AVCodecContext *ctx,
+                                   const struct sc_stream_session *session) {
+    (void) session;
+
     struct sc_recorder *recorder = DOWNCAST_VIDEO(sink);
     // only written from this thread, no need to lock
     assert(!recorder->video_init);
@@ -635,7 +638,10 @@ sc_recorder_video_packet_sink_push(struct sc_packet_sink *sink,
 
 static bool
 sc_recorder_audio_packet_sink_open(struct sc_packet_sink *sink,
-                                   AVCodecContext *ctx) {
+                                   AVCodecContext *ctx,
+                                   const struct sc_stream_session *session) {
+    (void) session;
+
     struct sc_recorder *recorder = DOWNCAST_AUDIO(sink);
     assert(recorder->audio);
     // only written from this thread, no need to lock
diff --git a/app/src/screen.c b/app/src/screen.c
index 1d694f12..51f7bf46 100644
--- a/app/src/screen.c
+++ b/app/src/screen.c
@@ -252,9 +252,11 @@ event_watcher(void *data, SDL_Event *event) {
 
 static bool
 sc_screen_frame_sink_open(struct sc_frame_sink *sink,
-                          const AVCodecContext *ctx) {
+                          const AVCodecContext *ctx,
+                          const struct sc_stream_session *session) {
     assert(ctx->pix_fmt == AV_PIX_FMT_YUV420P);
     (void) ctx;
+    (void) session;
 
     struct sc_screen *screen = DOWNCAST(sink);
 
diff --git a/app/src/trait/frame_sink.h b/app/src/trait/frame_sink.h
index 67be4d46..dc94b1fc 100644
--- a/app/src/trait/frame_sink.h
+++ b/app/src/trait/frame_sink.h
@@ -6,6 +6,8 @@
 #include <stdbool.h>
 #include <libavcodec/avcodec.h>
 
+#include "trait/packet_sink.h"
+
 /**
  * Frame sink trait.
  *
@@ -17,9 +19,16 @@ struct sc_frame_sink {
 
 struct sc_frame_sink_ops {
     /* The codec context is valid until the sink is closed */
-    bool (*open)(struct sc_frame_sink *sink, const AVCodecContext *ctx);
+    bool (*open)(struct sc_frame_sink *sink, const AVCodecContext *ctx,
+                 const struct sc_stream_session *session);
     void (*close)(struct sc_frame_sink *sink);
     bool (*push)(struct sc_frame_sink *sink, const AVFrame *frame);
+
+    /**
+     * Optional callback to be notified of a new stream session.
+     */
+    bool (*push_session)(struct sc_frame_sink *sink,
+                         const struct sc_stream_session *session);
 };
 
 #endif
diff --git a/app/src/trait/frame_source.c b/app/src/trait/frame_source.c
index 56848309..d34e628b 100644
--- a/app/src/trait/frame_source.c
+++ b/app/src/trait/frame_source.c
@@ -27,11 +27,12 @@ sc_frame_source_sinks_close_firsts(struct sc_frame_source *source,
 
 bool
 sc_frame_source_sinks_open(struct sc_frame_source *source,
-                           const AVCodecContext *ctx) {
+                           const AVCodecContext *ctx,
+                           const struct sc_stream_session *session) {
     assert(source->sink_count);
     for (unsigned i = 0; i < source->sink_count; ++i) {
         struct sc_frame_sink *sink = source->sinks[i];
-        if (!sink->ops->open(sink, ctx)) {
+        if (!sink->ops->open(sink, ctx, session)) {
             sc_frame_source_sinks_close_firsts(source, i);
             return false;
         }
@@ -59,3 +60,18 @@ sc_frame_source_sinks_push(struct sc_frame_source *source,
 
     return true;
 }
+
+bool
+sc_frame_source_sinks_push_session(struct sc_frame_source *source,
+                                   const struct sc_stream_session *session) {
+    assert(source->sink_count);
+    for (unsigned i = 0; i < source->sink_count; ++i) {
+        struct sc_frame_sink *sink = source->sinks[i];
+        if (sink->ops->push_session &&
+                !sink->ops->push_session(sink, session)) {
+            return false;
+        }
+    }
+
+    return true;
+}
diff --git a/app/src/trait/frame_source.h b/app/src/trait/frame_source.h
index cb1ef905..0a1ea8f1 100644
--- a/app/src/trait/frame_source.h
+++ b/app/src/trait/frame_source.h
@@ -28,7 +28,8 @@ sc_frame_source_add_sink(struct sc_frame_source *source,
 
 bool
 sc_frame_source_sinks_open(struct sc_frame_source *source,
-                           const AVCodecContext *ctx);
+                           const AVCodecContext *ctx,
+                           const struct sc_stream_session *session);
 
 void
 sc_frame_source_sinks_close(struct sc_frame_source *source);
@@ -37,4 +38,8 @@ bool
 sc_frame_source_sinks_push(struct sc_frame_source *source,
                            const AVFrame *frame);
 
+bool
+sc_frame_source_sinks_push_session(struct sc_frame_source *source,
+                                   const struct sc_stream_session *session);
+
 #endif
diff --git a/app/src/trait/packet_sink.h b/app/src/trait/packet_sink.h
index e12dea12..db742b2f 100644
--- a/app/src/trait/packet_sink.h
+++ b/app/src/trait/packet_sink.h
@@ -15,12 +15,28 @@ struct sc_packet_sink {
     const struct sc_packet_sink_ops *ops;
 };
 
+struct sc_stream_session_video {
+    uint32_t width;
+    uint32_t height;
+};
+
+struct sc_stream_session {
+    struct sc_stream_session_video video;
+};
+
 struct sc_packet_sink_ops {
     /* The codec context is valid until the sink is closed */
-    bool (*open)(struct sc_packet_sink *sink, AVCodecContext *ctx);
+    bool (*open)(struct sc_packet_sink *sink, AVCodecContext *ctx,
+                 const struct sc_stream_session *session);
     void (*close)(struct sc_packet_sink *sink);
     bool (*push)(struct sc_packet_sink *sink, const AVPacket *packet);
 
+    /**
+     * Optional callback to be notified of a new stream session.
+     */
+    bool (*push_session)(struct sc_packet_sink *sink,
+                         const struct sc_stream_session *session);
+
     /*/
      * Called when the input stream has been disabled at runtime.
      *
diff --git a/app/src/trait/packet_source.c b/app/src/trait/packet_source.c
index 0a2c6c4d..49cb6d14 100644
--- a/app/src/trait/packet_source.c
+++ b/app/src/trait/packet_source.c
@@ -27,11 +27,12 @@ sc_packet_source_sinks_close_firsts(struct sc_packet_source *source,
 
 bool
 sc_packet_source_sinks_open(struct sc_packet_source *source,
-                            AVCodecContext *ctx) {
+                            AVCodecContext *ctx,
+                            const struct sc_stream_session *session) {
     assert(source->sink_count);
     for (unsigned i = 0; i < source->sink_count; ++i) {
         struct sc_packet_sink *sink = source->sinks[i];
-        if (!sink->ops->open(sink, ctx)) {
+        if (!sink->ops->open(sink, ctx, session)) {
             sc_packet_source_sinks_close_firsts(source, i);
             return false;
         }
@@ -60,6 +61,20 @@ sc_packet_source_sinks_push(struct sc_packet_source *source,
     return true;
 }
 
+bool
+sc_packet_source_sinks_push_session(struct sc_packet_source *source,
+                                    const struct sc_stream_session *session) {
+    assert(source->sink_count);
+    for (unsigned i = 0; i < source->sink_count; ++i) {
+        struct sc_packet_sink *sink = source->sinks[i];
+        if (!sink->ops->push_session(sink, session)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 void
 sc_packet_source_sinks_disable(struct sc_packet_source *source) {
     assert(source->sink_count);
diff --git a/app/src/trait/packet_source.h b/app/src/trait/packet_source.h
index 8788021a..07be4970 100644
--- a/app/src/trait/packet_source.h
+++ b/app/src/trait/packet_source.h
@@ -28,7 +28,8 @@ sc_packet_source_add_sink(struct sc_packet_source *source,
 
 bool
 sc_packet_source_sinks_open(struct sc_packet_source *source,
-                            AVCodecContext *ctx);
+                            AVCodecContext *ctx,
+                            const struct sc_stream_session *session);
 
 void
 sc_packet_source_sinks_close(struct sc_packet_source *source);
@@ -37,6 +38,10 @@ bool
 sc_packet_source_sinks_push(struct sc_packet_source *source,
                             const AVPacket *packet);
 
+bool
+sc_packet_source_sinks_push_session(struct sc_packet_source *source,
+                                    const struct sc_stream_session *session);
+
 void
 sc_packet_source_sinks_disable(struct sc_packet_source *source);
 
diff --git a/app/src/v4l2_sink.c b/app/src/v4l2_sink.c
index da9e02ef..92551149 100644
--- a/app/src/v4l2_sink.c
+++ b/app/src/v4l2_sink.c
@@ -146,9 +146,11 @@ run_v4l2_sink(void *data) {
 }
 
 static bool
-sc_v4l2_sink_open(struct sc_v4l2_sink *vs, const AVCodecContext *ctx) {
+sc_v4l2_sink_open(struct sc_v4l2_sink *vs, const AVCodecContext *ctx,
+                  const struct sc_stream_session *session) {
     assert(ctx->pix_fmt == AV_PIX_FMT_YUV420P);
     (void) ctx;
+    (void) session;
 
     bool ok = sc_frame_buffer_init(&vs->fb);
     if (!ok) {
@@ -326,9 +328,10 @@ sc_v4l2_sink_push(struct sc_v4l2_sink *vs, const AVFrame *frame) {
 }
 
 static bool
-sc_v4l2_frame_sink_open(struct sc_frame_sink *sink, const AVCodecContext *ctx) {
+sc_v4l2_frame_sink_open(struct sc_frame_sink *sink, const AVCodecContext *ctx,
+                        const struct sc_stream_session *session) {
     struct sc_v4l2_sink *vs = DOWNCAST(sink);
-    return sc_v4l2_sink_open(vs, ctx);
+    return sc_v4l2_sink_open(vs, ctx, session);
 }
 
 static void
diff --git a/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java b/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java
index 93c5077b..57abde5b 100644
--- a/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java
+++ b/server/src/main/java/com/genymobile/scrcpy/device/Streamer.java
@@ -3,6 +3,7 @@ package com.genymobile.scrcpy.device;
 import com.genymobile.scrcpy.audio.AudioCodec;
 import com.genymobile.scrcpy.util.Codec;
 import com.genymobile.scrcpy.util.IO;
+import com.genymobile.scrcpy.util.Ln;
 
 import android.media.MediaCodec;
 
@@ -14,8 +15,9 @@ import java.util.Arrays;
 
 public final class Streamer {
 
-    private static final long PACKET_FLAG_CONFIG = 1L << 63;
-    private static final long PACKET_FLAG_KEY_FRAME = 1L << 62;
+    private static final long PACKET_FLAG_SESSION = 1L << 63;
+    private static final long PACKET_FLAG_CONFIG = 1L << 62;
+    private static final long PACKET_FLAG_KEY_FRAME = 1L << 61;
 
     private final FileDescriptor fd;
     private final Codec codec;
@@ -44,12 +46,10 @@ public final class Streamer {
         }
     }
 
-    public void writeVideoHeader(Size videoSize) throws IOException {
+    public void writeVideoHeader() throws IOException {
         if (sendStreamMeta) {
-            ByteBuffer buffer = ByteBuffer.allocate(12);
+            ByteBuffer buffer = ByteBuffer.allocate(4);
             buffer.putInt(codec.getId());
-            buffer.putInt(videoSize.getWidth());
-            buffer.putInt(videoSize.getHeight());
             buffer.flip();
             IO.writeFully(fd, buffer);
         }
@@ -89,6 +89,18 @@ public final class Streamer {
         writePacket(codecBuffer, pts, config, keyFrame);
     }
 
+    public void writeSessionMeta(int width, int height) throws IOException {
+        if (sendStreamMeta) {
+            headerBuffer.clear();
+
+            headerBuffer.putInt((int) (PACKET_FLAG_SESSION >> 32)); // Set the first bit to 1
+            headerBuffer.putInt(width);
+            headerBuffer.putInt(height);
+            headerBuffer.flip();
+            IO.writeFully(fd, headerBuffer);
+        }
+    }
+
     private void writeFrameMeta(FileDescriptor fd, int packetSize, long pts, boolean config, boolean keyFrame) throws IOException {
         headerBuffer.clear();
 
diff --git a/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java b/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java
index 236a5f48..461ba472 100644
--- a/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java
+++ b/server/src/main/java/com/genymobile/scrcpy/video/SurfaceEncoder.java
@@ -71,16 +71,13 @@ public class SurfaceEncoder implements AsyncProcessor {
 
         try {
             boolean alive;
-            boolean headerWritten = false;
+
+            streamer.writeVideoHeader();
 
             do {
                 reset.consumeReset(); // If a capture reset was requested, it is implicitly fulfilled
                 capture.prepare();
                 Size size = capture.getSize();
-                if (!headerWritten) {
-                    streamer.writeVideoHeader(size);
-                    headerWritten = true;
-                }
 
                 format.setInteger(MediaFormat.KEY_WIDTH, size.getWidth());
                 format.setInteger(MediaFormat.KEY_HEIGHT, size.getHeight());
@@ -107,6 +104,7 @@ public class SurfaceEncoder implements AsyncProcessor {
                         boolean resetRequested = reset.consumeReset();
                         if (!resetRequested) {
                             // If a reset is requested during encode(), it will interrupt the encoding by an EOS
+                            streamer.writeSessionMeta(size.getWidth(), size.getHeight());
                             encode(mediaCodec, streamer);
                         }
                         // The capture might have been closed internally (for example if the camera is disconnected)