Fix PTS produced by the default OPUS encoder

The default OPUS encoder on Android rewrites the PTS so that it exactly
matches the number of samples.

As a consequence:
 - audio clock drift is not compensated
 - hard silences are ignored

To fix this behavior, recreate the PTS based on the current time (after
encoding) and the packet duration.
This commit is contained in:
Romain Vimont 2025-03-02 17:09:43 +01:00
parent b292e356de
commit 63d848fc55

View file

@ -55,6 +55,9 @@ public final class AudioEncoder implements AsyncProcessor {
private final List<CodecOption> codecOptions;
private final String encoderName;
private boolean recreatePts;
private long previousPts;
// Capacity of 64 is in practice "infinite" (it is limited by the number of available MediaCodec buffers, typically 4).
// So many pending tasks would lead to an unacceptable delay anyway.
private final BlockingQueue<InputTask> inputTasks = new ArrayBlockingQueue<>(64);
@ -118,6 +121,9 @@ public final class AudioEncoder implements AsyncProcessor {
OutputTask task = outputTasks.take();
ByteBuffer buffer = mediaCodec.getOutputBuffer(task.index);
try {
if (recreatePts) {
fixTimestamp(task.bufferInfo);
}
streamer.writePacket(buffer, task.bufferInfo);
} finally {
mediaCodec.releaseOutputBuffer(task.index, false);
@ -125,6 +131,24 @@ public final class AudioEncoder implements AsyncProcessor {
}
}
private void fixTimestamp(MediaCodec.BufferInfo bufferInfo) {
assert recreatePts;
if ((bufferInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
// Config packet, nothing to fix
return;
}
long pts = bufferInfo.presentationTimeUs;
if (previousPts != 0) {
long now = System.nanoTime() / 1000;
long duration = pts - previousPts;
bufferInfo.presentationTimeUs = now - duration;
}
previousPts = pts;
}
@Override
public void start(TerminationListener listener) {
thread = new Thread(() -> {
@ -194,6 +218,11 @@ public final class AudioEncoder implements AsyncProcessor {
Codec codec = streamer.getCodec();
mediaCodec = createMediaCodec(codec, encoderName);
// The default OPUS encoder generates its own input PTS which matches the number of samples. This is not the behavior we want: it
// ignores any audio clock drift and hard silences (packets not produced on silence). To fix this behavior, regenerate PTS based on the
// current time and the packet duration.
recreatePts = "c2.android.opus.encoder".equals(mediaCodec.getName());
mediaCodecThread = new HandlerThread("media-codec");
mediaCodecThread.start();