fix: AC9 seek fine re-sync + deterministic decoder drain (WebCodecs Opus)
Seek now trims the lead-in so playback lands at the requested time, not the page start; decoder drain polls decodeQueueSize (bounded) instead of a single timeout. Minor cleanups.
This commit is contained in:
@@ -52,6 +52,11 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
||||
private decodedQueue: AudioData[] = [];
|
||||
private fatalError = false;
|
||||
|
||||
// Frames to discard from the head of the first post-seek decoded output (AC9 fine re-sync).
|
||||
// Set by reinitializeForRangeContinuation to (targetTimeSeconds - landingTimeSeconds) * 48000,
|
||||
// consumed frame-by-frame in audioDataToBuffer until exhausted (then zero for the rest of the stream).
|
||||
private leadTrimFrames = 0;
|
||||
|
||||
// Monotonic packet timestamp (microseconds) handed to each EncodedAudioChunk. WebCodecs requires
|
||||
// strictly increasing timestamps; the true value is irrelevant to us (we schedule by accumulation),
|
||||
// so a synthetic 48 kHz-derived counter suffices and stays exact.
|
||||
@@ -135,14 +140,14 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
||||
|
||||
const packets = this.demuxer.push(chunk);
|
||||
this.decodePackets(packets);
|
||||
// Give the WebCodecs output callback a chance to run before we drain.
|
||||
// Wait until the WebCodecs decoder has processed the queued packets before draining.
|
||||
await this.yieldToDecoder();
|
||||
return this.drainDecoded(false);
|
||||
return this.drainDecoded();
|
||||
}
|
||||
|
||||
async complete(): Promise<AudioBuffer[]> {
|
||||
if (this.fatalError || !this.decoder || this.decoder.state !== 'configured') {
|
||||
return this.drainDecoded(true);
|
||||
return this.drainDecoded();
|
||||
}
|
||||
try {
|
||||
await this.decoder.flush();
|
||||
@@ -151,10 +156,28 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
||||
// own cancellation handles that — surface nothing, just drain what we have.
|
||||
console.warn('Opus decoder flush interrupted:', (err as Error).message);
|
||||
}
|
||||
return this.drainDecoded(true);
|
||||
return this.drainDecoded();
|
||||
}
|
||||
|
||||
reinitializeForRangeContinuation(): void {
|
||||
/**
|
||||
* Reinitialize for a Range-continuation stream after seek-beyond-buffer.
|
||||
*
|
||||
* @param landingTimeSeconds The actual page-start presentation time resolved from the seek index
|
||||
* (t_page ≤ targetTimeSeconds). This is the time at which the decoder
|
||||
* will begin emitting audio after reconfigure.
|
||||
* @param targetTimeSeconds The user-requested seek position. The difference
|
||||
* `(target - landing) * OPUS_SAMPLE_RATE` frames are trimmed from the
|
||||
* head of the decoded output so playback lands precisely at the target
|
||||
* (AC9 fine re-sync, §3.4a step 4).
|
||||
*
|
||||
* Pre-skip note: the reconfigure re-applies the WebCodecs Opus decoder's own pre-skip trim. The
|
||||
* W3C spec is non-normative on the exact sample count and browsers vary (~312 samples at 48 kHz in
|
||||
* practice). `leadTrimFrames` is computed from the sidecar's pre-skip-corrected presentation times
|
||||
* (via `presentationTimeSeconds`), so it does NOT double-count the per-reconfigure pre-skip; the
|
||||
* decoder handles that internally. If browser testing reveals a residual offset, adjust the
|
||||
* `leadTrimFrames` calculation here — this is the single point of control.
|
||||
*/
|
||||
reinitializeForRangeContinuation(landingTimeSeconds: number, targetTimeSeconds: number): void {
|
||||
// New 206 body starts on a page boundary with no setup pages; the codec config is unchanged but
|
||||
// inter-frame state must restart cleanly. AudioDecoder.reset() drops queued work and returns the
|
||||
// decoder to 'unconfigured', so we reconfigure with the cached config. The demuxer goes into
|
||||
@@ -162,6 +185,9 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
||||
this.demuxer.reset(true);
|
||||
this.decodedQueue = [];
|
||||
this.emittedFrames = 0; // post-seek buffers are positioned by the scheduler's playbackOffset
|
||||
// Arm the lead trim: skip enough decoded frames to land at targetTimeSeconds, not at
|
||||
// landingTimeSeconds (the page start). Clamp to ≥0 to guard against floating-point rounding.
|
||||
this.leadTrimFrames = Math.max(0, Math.round((targetTimeSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE));
|
||||
if (this.decoder && this.decoder.state === 'configured') {
|
||||
this.decoder.reset();
|
||||
this.decoder.configure(this.buildConfig());
|
||||
@@ -204,10 +230,10 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert every queued AudioData into an AudioBuffer at the context sample rate, applying end-trim
|
||||
* against the known total frame count. `final` allows the very last partial buffer to be emitted.
|
||||
* Convert every queued AudioData into an AudioBuffer at the context sample rate, applying
|
||||
* end-trim against the known total frame count and lead-trim for post-seek fine re-sync.
|
||||
*/
|
||||
private drainDecoded(_final: boolean): AudioBuffer[] {
|
||||
private drainDecoded(): AudioBuffer[] {
|
||||
const out: AudioBuffer[] = [];
|
||||
const ctx = this.contextManager.getContext();
|
||||
|
||||
@@ -224,39 +250,66 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy an AudioData's PCM into a new AudioBuffer, trimming to not exceed the known total length
|
||||
* (end-trim). Returns null if the trim leaves zero frames (the buffer is entirely past the end).
|
||||
* Copy an AudioData's PCM into a new AudioBuffer, applying:
|
||||
* 1. Lead-trim (post-seek fine re-sync): skip `leadTrimFrames` from the front so the audible
|
||||
* start lands at the requested seek position, not at the preceding page boundary (AC9).
|
||||
* 2. End-trim: cap cumulative output at `totalFrames` so the final partial frame's padding
|
||||
* does not leak past the true stream end.
|
||||
* Returns null when either trim leaves zero usable frames.
|
||||
*/
|
||||
private audioDataToBuffer(ctx: BaseAudioContext, data: AudioData): AudioBuffer | null {
|
||||
const frames = data.numberOfFrames;
|
||||
const channels = data.numberOfChannels;
|
||||
|
||||
// Lead-trim: consume frames from the front for post-seek fine re-sync (AC9).
|
||||
let skip = 0;
|
||||
if (this.leadTrimFrames > 0) {
|
||||
skip = Math.min(this.leadTrimFrames, frames);
|
||||
this.leadTrimFrames -= skip;
|
||||
}
|
||||
const available = frames - skip;
|
||||
if (available <= 0) return null;
|
||||
|
||||
// End-trim: cap cumulative output at totalFrames.
|
||||
let keep = frames;
|
||||
let keep = available;
|
||||
if (Number.isFinite(this.totalFrames)) {
|
||||
const room = this.totalFrames - this.emittedFrames;
|
||||
if (room <= 0) return null;
|
||||
if (room < frames) keep = room;
|
||||
if (room < available) keep = room;
|
||||
}
|
||||
if (keep <= 0) return null;
|
||||
|
||||
const buffer = ctx.createBuffer(channels, keep, data.sampleRate);
|
||||
const plane = new Float32Array(frames); // copyTo fills the full frame count, then we slice
|
||||
// Allocate only for the frames we actually copy; frameOffset skips the lead-trim region.
|
||||
const plane = new Float32Array(keep);
|
||||
for (let ch = 0; ch < channels; ch++) {
|
||||
data.copyTo(plane, { planeIndex: ch, format: 'f32-planar' });
|
||||
buffer.copyToChannel(keep === frames ? plane : plane.subarray(0, keep), ch);
|
||||
data.copyTo(plane, { planeIndex: ch, frameOffset: skip, frameCount: keep, format: 'f32-planar' });
|
||||
buffer.copyToChannel(plane, ch);
|
||||
}
|
||||
this.emittedFrames += keep;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Yield to the microtask/event loop so the synchronous decode() calls above let their async output
|
||||
* callbacks fire before we drain. A zero-delay timeout (macrotask) is the reliable cross-engine way
|
||||
* to let WebCodecs deliver outputs; awaiting decodeQueueSize draining is the precise alternative but
|
||||
* not all engines settle it synchronously.
|
||||
* Wait until the AudioDecoder's internal work queue drains (decodeQueueSize → 0), so output
|
||||
* callbacks have fired before we drain decodedQueue. Bounded to MAX_YIELD_ITERS × 4 ms to guard
|
||||
* against a stuck decoder; any outputs collected before the cap are still returned. `complete()`
|
||||
* uses decoder.flush() as its final barrier instead (flush() is the authoritative end-of-stream
|
||||
* drain).
|
||||
*/
|
||||
private yieldToDecoder(): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, 0));
|
||||
const MAX_YIELD_ITERS = 50; // 50 × 4 ms = 200 ms ceiling
|
||||
return new Promise<void>((resolve) => {
|
||||
let iters = 0;
|
||||
const poll = () => {
|
||||
if (!this.decoder || this.decoder.decodeQueueSize === 0 || iters >= MAX_YIELD_ITERS) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
iters++;
|
||||
setTimeout(poll, 4);
|
||||
};
|
||||
poll();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user