fix: AC9 seek fine re-sync + deterministic decoder drain (WebCodecs Opus)

Seek now trims the lead-in so playback lands at the requested time, not the page start; decoder drain polls decodeQueueSize (bounded) instead of a single timeout. Minor cleanups.
2026-06-23 20:57:05 -04:00
parent 7f3fb74126
commit 5a75da1769
6 changed files with 186 additions and 42 deletions
@@ -52,6 +52,11 @@ export class OpusStreamDecoder implements IStreamingDecoder {
    private decodedQueue: AudioData[] = [];
    private fatalError = false;

+    // Frames to discard from the head of the first post-seek decoded output (AC9 fine re-sync).
+    // Set by reinitializeForRangeContinuation to (targetTimeSeconds - landingTimeSeconds) * 48000,
+    // consumed frame-by-frame in audioDataToBuffer until exhausted (then zero for the rest of the stream).
+    private leadTrimFrames = 0;
+
    // Monotonic packet timestamp (microseconds) handed to each EncodedAudioChunk. WebCodecs requires
    // strictly increasing timestamps; the true value is irrelevant to us (we schedule by accumulation),
    // so a synthetic 48 kHz-derived counter suffices and stays exact.
@@ -135,14 +140,14 @@ export class OpusStreamDecoder implements IStreamingDecoder {

        const packets = this.demuxer.push(chunk);
        this.decodePackets(packets);
-        // Give the WebCodecs output callback a chance to run before we drain.
+        // Wait until the WebCodecs decoder has processed the queued packets before draining.
        await this.yieldToDecoder();
-        return this.drainDecoded(false);
+        return this.drainDecoded();
    }

    async complete(): Promise<AudioBuffer[]> {
        if (this.fatalError || !this.decoder || this.decoder.state !== 'configured') {
-            return this.drainDecoded(true);
+            return this.drainDecoded();
        }
        try {
            await this.decoder.flush();
@@ -151,10 +156,28 @@ export class OpusStreamDecoder implements IStreamingDecoder {
            // own cancellation handles that — surface nothing, just drain what we have.
            console.warn('Opus decoder flush interrupted:', (err as Error).message);
        }
-        return this.drainDecoded(true);
+        return this.drainDecoded();
    }

-    reinitializeForRangeContinuation(): void {
+    /**
+     * Reinitialize for a Range-continuation stream after seek-beyond-buffer.
+     *
+     * @param landingTimeSeconds  The actual page-start presentation time resolved from the seek index
+     *                            (t_page ≤ targetTimeSeconds). This is the time at which the decoder
+     *                            will begin emitting audio after reconfigure.
+     * @param targetTimeSeconds   The user-requested seek position. The difference
+     *                            `(target - landing) * OPUS_SAMPLE_RATE` frames are trimmed from the
+     *                            head of the decoded output so playback lands precisely at the target
+     *                            (AC9 fine re-sync, §3.4a step 4).
+     *
+     * Pre-skip note: the reconfigure re-applies the WebCodecs Opus decoder's own pre-skip trim. The
+     * W3C spec is non-normative on the exact sample count and browsers vary (~312 samples at 48 kHz in
+     * practice). `leadTrimFrames` is computed from the sidecar's pre-skip-corrected presentation times
+     * (via `presentationTimeSeconds`), so it does NOT double-count the per-reconfigure pre-skip; the
+     * decoder handles that internally. If browser testing reveals a residual offset, adjust the
+     * `leadTrimFrames` calculation here — this is the single point of control.
+     */
+    reinitializeForRangeContinuation(landingTimeSeconds: number, targetTimeSeconds: number): void {
        // New 206 body starts on a page boundary with no setup pages; the codec config is unchanged but
        // inter-frame state must restart cleanly. AudioDecoder.reset() drops queued work and returns the
        // decoder to 'unconfigured', so we reconfigure with the cached config. The demuxer goes into
@@ -162,6 +185,9 @@ export class OpusStreamDecoder implements IStreamingDecoder {
        this.demuxer.reset(true);
        this.decodedQueue = [];
        this.emittedFrames = 0; // post-seek buffers are positioned by the scheduler's playbackOffset
+        // Arm the lead trim: skip enough decoded frames to land at targetTimeSeconds, not at
+        // landingTimeSeconds (the page start). Clamp to ≥0 to guard against floating-point rounding.
+        this.leadTrimFrames = Math.max(0, Math.round((targetTimeSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE));
        if (this.decoder && this.decoder.state === 'configured') {
            this.decoder.reset();
            this.decoder.configure(this.buildConfig());
@@ -204,10 +230,10 @@ export class OpusStreamDecoder implements IStreamingDecoder {
    }

    /**
-     * Convert every queued AudioData into an AudioBuffer at the context sample rate, applying end-trim
-     * against the known total frame count. `final` allows the very last partial buffer to be emitted.
+     * Convert every queued AudioData into an AudioBuffer at the context sample rate, applying
+     * end-trim against the known total frame count and lead-trim for post-seek fine re-sync.
     */
-    private drainDecoded(_final: boolean): AudioBuffer[] {
+    private drainDecoded(): AudioBuffer[] {
        const out: AudioBuffer[] = [];
        const ctx = this.contextManager.getContext();

@@ -224,39 +250,66 @@ export class OpusStreamDecoder implements IStreamingDecoder {
    }

    /**
-     * Copy an AudioData's PCM into a new AudioBuffer, trimming to not exceed the known total length
-     * (end-trim). Returns null if the trim leaves zero frames (the buffer is entirely past the end).
+     * Copy an AudioData's PCM into a new AudioBuffer, applying:
+     *   1. Lead-trim (post-seek fine re-sync): skip `leadTrimFrames` from the front so the audible
+     *      start lands at the requested seek position, not at the preceding page boundary (AC9).
+     *   2. End-trim: cap cumulative output at `totalFrames` so the final partial frame's padding
+     *      does not leak past the true stream end.
+     * Returns null when either trim leaves zero usable frames.
     */
    private audioDataToBuffer(ctx: BaseAudioContext, data: AudioData): AudioBuffer | null {
        const frames = data.numberOfFrames;
        const channels = data.numberOfChannels;

+        // Lead-trim: consume frames from the front for post-seek fine re-sync (AC9).
+        let skip = 0;
+        if (this.leadTrimFrames > 0) {
+            skip = Math.min(this.leadTrimFrames, frames);
+            this.leadTrimFrames -= skip;
+        }
+        const available = frames - skip;
+        if (available <= 0) return null;
+
        // End-trim: cap cumulative output at totalFrames.
-        let keep = frames;
+        let keep = available;
        if (Number.isFinite(this.totalFrames)) {
            const room = this.totalFrames - this.emittedFrames;
            if (room <= 0) return null;
-            if (room < frames) keep = room;
+            if (room < available) keep = room;
        }
        if (keep <= 0) return null;

        const buffer = ctx.createBuffer(channels, keep, data.sampleRate);
-        const plane = new Float32Array(frames); // copyTo fills the full frame count, then we slice
+        // Allocate only for the frames we actually copy; frameOffset skips the lead-trim region.
+        const plane = new Float32Array(keep);
        for (let ch = 0; ch < channels; ch++) {
-            data.copyTo(plane, { planeIndex: ch, format: 'f32-planar' });
-            buffer.copyToChannel(keep === frames ? plane : plane.subarray(0, keep), ch);
+            data.copyTo(plane, { planeIndex: ch, frameOffset: skip, frameCount: keep, format: 'f32-planar' });
+            buffer.copyToChannel(plane, ch);
        }
        this.emittedFrames += keep;
        return buffer;
    }

    /**
-     * Yield to the microtask/event loop so the synchronous decode() calls above let their async output
-     * callbacks fire before we drain. A zero-delay timeout (macrotask) is the reliable cross-engine way
-     * to let WebCodecs deliver outputs; awaiting decodeQueueSize draining is the precise alternative but
-     * not all engines settle it synchronously.
+     * Wait until the AudioDecoder's internal work queue drains (decodeQueueSize → 0), so output
+     * callbacks have fired before we drain decodedQueue. Bounded to MAX_YIELD_ITERS × 4 ms to guard
+     * against a stuck decoder; any outputs collected before the cap are still returned. `complete()`
+     * uses decoder.flush() as its final barrier instead (flush() is the authoritative end-of-stream
+     * drain).
     */
    private yieldToDecoder(): Promise<void> {
-        return new Promise((resolve) => setTimeout(resolve, 0));
+        const MAX_YIELD_ITERS = 50; // 50 × 4 ms = 200 ms ceiling
+        return new Promise<void>((resolve) => {
+            let iters = 0;
+            const poll = () => {
+                if (!this.decoder || this.decoder.decodeQueueSize === 0 || iters >= MAX_YIELD_ITERS) {
+                    resolve();
+                    return;
+                }
+                iters++;
+                setTimeout(poll, 4);
+            };
+            poll();
+        });
    }
 }