fix: AC9 seek fine re-sync + deterministic decoder drain (WebCodecs Opus)
Seek now trims the lead-in so playback lands at the requested time, not the page start; decoder drain polls decodeQueueSize (bounded) instead of a single timeout. Minor cleanups.
This commit is contained in:
@@ -16,7 +16,7 @@ import { WavFormatDecoder } from './WavFormatDecoder.js';
|
|||||||
import { Mp3FormatDecoder } from './Mp3FormatDecoder.js';
|
import { Mp3FormatDecoder } from './Mp3FormatDecoder.js';
|
||||||
import { FlacFormatDecoder } from './FlacFormatDecoder.js';
|
import { FlacFormatDecoder } from './FlacFormatDecoder.js';
|
||||||
import { OpusStreamDecoder } from './OpusStreamDecoder.js';
|
import { OpusStreamDecoder } from './OpusStreamDecoder.js';
|
||||||
import { OpusSeekData, parseSidecar, resolveOpusByteOffset } from './OpusSidecar.js';
|
import { OpusSeekData, parseSidecar, resolveOpusByteOffset, OpusSeekResolution } from './OpusSidecar.js';
|
||||||
|
|
||||||
export interface AudioResult {
|
export interface AudioResult {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
@@ -56,6 +56,10 @@ export class AudioPlayer {
|
|||||||
// The sidecar in effect for the active Opus stream (its seek index resolves byte offsets). Distinct
|
// The sidecar in effect for the active Opus stream (its seek index resolves byte offsets). Distinct
|
||||||
// from pendingOpusSidecar, which is the one set for the NEXT stream init.
|
// from pendingOpusSidecar, which is the one set for the NEXT stream init.
|
||||||
private activeOpusSidecar: OpusSeekData | null = null;
|
private activeOpusSidecar: OpusSeekData | null = null;
|
||||||
|
// The landing time of the most recent seek-beyond-buffer page resolution (seconds). Set by
|
||||||
|
// seekBeyondBuffer, consumed by reinitializeFromOffset to trim leading decoded frames so the
|
||||||
|
// audible position matches the requested seek target (AC9 fine re-sync, §3.4a step 4).
|
||||||
|
private _seekLandingTime: number = 0;
|
||||||
|
|
||||||
// Playback state
|
// Playback state
|
||||||
private isPlaying: boolean = false;
|
private isPlaying: boolean = false;
|
||||||
@@ -453,15 +457,19 @@ export class AudioPlayer {
|
|||||||
try {
|
try {
|
||||||
// Opus: resolve the offset from the precomputed seek index (the accurate VBR-safe transfer
|
// Opus: resolve the offset from the precomputed seek index (the accurate VBR-safe transfer
|
||||||
// function). The returned offset is a real page start, so the Range continuation lands the
|
// function). The returned offset is a real page start, so the Range continuation lands the
|
||||||
// demuxer/decoder Ogg-sync-aligned.
|
// demuxer/decoder Ogg-sync-aligned. Also capture the landing time (t_page ≤ position) so
|
||||||
|
// reinitializeFromOffset can trim the leading decoded frames and land precisely at `position`
|
||||||
|
// (AC9 fine re-sync, §3.4a step 4).
|
||||||
if (this.opusDecoder) {
|
if (this.opusDecoder) {
|
||||||
if (!this.activeOpusSidecar) {
|
if (!this.activeOpusSidecar) {
|
||||||
return { success: false, error: 'Cannot calculate byte offset' };
|
return { success: false, error: 'Cannot calculate byte offset' };
|
||||||
}
|
}
|
||||||
|
const resolution: OpusSeekResolution = resolveOpusByteOffset(this.activeOpusSidecar, position);
|
||||||
|
this._seekLandingTime = resolution.landingTimeSeconds;
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
seekBeyondBuffer: true,
|
seekBeyondBuffer: true,
|
||||||
byteOffset: resolveOpusByteOffset(this.activeOpusSidecar, position)
|
byteOffset: resolution.byteOffset
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -499,7 +507,7 @@ export class AudioPlayer {
|
|||||||
calculateByteOffset(positionSeconds: number): number {
|
calculateByteOffset(positionSeconds: number): number {
|
||||||
if (this.opusDecoder) {
|
if (this.opusDecoder) {
|
||||||
return this.activeOpusSidecar
|
return this.activeOpusSidecar
|
||||||
? resolveOpusByteOffset(this.activeOpusSidecar, positionSeconds)
|
? resolveOpusByteOffset(this.activeOpusSidecar, positionSeconds).byteOffset
|
||||||
: 0;
|
: 0;
|
||||||
}
|
}
|
||||||
if (!this.streamDecoder.getFormatInfo()) return 0;
|
if (!this.streamDecoder.getFormatInfo()) return 0;
|
||||||
@@ -521,10 +529,11 @@ export class AudioPlayer {
|
|||||||
this.scheduler.setPlaybackOffset(seekPosition);
|
this.scheduler.setPlaybackOffset(seekPosition);
|
||||||
|
|
||||||
// Reinitialize the active decoder for the Range-continuation stream (206 body, no header/
|
// Reinitialize the active decoder for the Range-continuation stream (206 body, no header/
|
||||||
// setup pages). Opus resets demux + codec state (keeping the cached config); the
|
// setup pages). Opus resets demux + codec state (keeping the cached config) and arms the
|
||||||
|
// lead-trim so decoded audio starts at `seekPosition`, not at the page boundary (AC9). The
|
||||||
// StreamDecoder path uses totalStreamLength (the 206 Content-Length) to detect completion.
|
// StreamDecoder path uses totalStreamLength (the 206 Content-Length) to detect completion.
|
||||||
if (this.opusDecoder) {
|
if (this.opusDecoder) {
|
||||||
this.opusDecoder.reinitializeForRangeContinuation();
|
this.opusDecoder.reinitializeForRangeContinuation(this._seekLandingTime, seekPosition);
|
||||||
} else {
|
} else {
|
||||||
this.streamDecoder.reinitializeForRangeContinuation(totalStreamLength);
|
this.streamDecoder.reinitializeForRangeContinuation(totalStreamLength);
|
||||||
}
|
}
|
||||||
@@ -643,6 +652,7 @@ export class AudioPlayer {
|
|||||||
this.isStreamingMode = false;
|
this.isStreamingMode = false;
|
||||||
this.streamingStarted = false;
|
this.streamingStarted = false;
|
||||||
this.streamingCompleted = false;
|
this.streamingCompleted = false;
|
||||||
|
this._seekLandingTime = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private handlePlaybackEnded(): void {
|
private handlePlaybackEnded(): void {
|
||||||
|
|||||||
@@ -50,8 +50,13 @@ export interface IStreamingDecoder {
|
|||||||
* Reinitialize for a Range-continuation after seek-beyond-buffer. The 206 body begins on an Ogg page
|
* Reinitialize for a Range-continuation after seek-beyond-buffer. The 206 body begins on an Ogg page
|
||||||
* boundary and carries no setup pages — the decoder reuses the cached config and resets demux/codec
|
* boundary and carries no setup pages — the decoder reuses the cached config and resets demux/codec
|
||||||
* state so inter-frame continuity restarts cleanly from the new offset.
|
* state so inter-frame continuity restarts cleanly from the new offset.
|
||||||
|
*
|
||||||
|
* @param landingTimeSeconds The actual presentation time of the resolved seek page (t_page ≤ target).
|
||||||
|
* @param targetTimeSeconds The user-requested seek position. The decoder trims the leading
|
||||||
|
* `(target - landing) * sampleRate` frames so playback lands at target
|
||||||
|
* (AC9 fine re-sync, §3.4a step 4).
|
||||||
*/
|
*/
|
||||||
reinitializeForRangeContinuation(): void;
|
reinitializeForRangeContinuation(landingTimeSeconds: number, targetTimeSeconds: number): void;
|
||||||
|
|
||||||
/** Tear down the underlying WebCodecs decoder and release resources. */
|
/** Tear down the underlying WebCodecs decoder and release resources. */
|
||||||
dispose(): void;
|
dispose(): void;
|
||||||
|
|||||||
@@ -155,6 +155,9 @@ export class OggDemuxer {
|
|||||||
const isEos = (headerType & 0x04) !== 0;
|
const isEos = (headerType & 0x04) !== 0;
|
||||||
const granule = readUint64LE(header, GRANULE_OFFSET);
|
const granule = readUint64LE(header, GRANULE_OFFSET);
|
||||||
// 0xFFFFFFFFFFFFFFFF (-1) means "no packet completed on this page" — no usable timestamp.
|
// 0xFFFFFFFFFFFFFFFF (-1) means "no packet completed on this page" — no usable timestamp.
|
||||||
|
// We check the raw bytes rather than comparing `granule === -1` (or the equivalent JS number):
|
||||||
|
// the full 64-bit sentinel exceeds 2^53 and cannot be represented exactly as an IEEE-754 double,
|
||||||
|
// so the parsed value from readUint64LE would not equal the sentinel. The byte check is exact.
|
||||||
const hasGranule = !(header[GRANULE_OFFSET] === 0xff && header[GRANULE_OFFSET + 1] === 0xff &&
|
const hasGranule = !(header[GRANULE_OFFSET] === 0xff && header[GRANULE_OFFSET + 1] === 0xff &&
|
||||||
header[GRANULE_OFFSET + 2] === 0xff && header[GRANULE_OFFSET + 3] === 0xff &&
|
header[GRANULE_OFFSET + 2] === 0xff && header[GRANULE_OFFSET + 3] === 0xff &&
|
||||||
header[GRANULE_OFFSET + 4] === 0xff && header[GRANULE_OFFSET + 5] === 0xff &&
|
header[GRANULE_OFFSET + 4] === 0xff && header[GRANULE_OFFSET + 5] === 0xff &&
|
||||||
|
|||||||
@@ -123,21 +123,40 @@ export function presentationTimeSeconds(granulePosition: number, preSkip: number
|
|||||||
return Math.max(0, (granulePosition - preSkip) / OPUS_SAMPLE_RATE);
|
return Math.max(0, (granulePosition - preSkip) / OPUS_SAMPLE_RATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of resolving a seek time to a page-start byte offset.
|
||||||
|
* `byteOffset` is the Range request origin; `landingTimeSeconds` is the actual presentation time of that
|
||||||
|
* page (t_page ≤ positionSeconds). The caller uses the delta `positionSeconds - landingTimeSeconds` to
|
||||||
|
* trim the decoded leading frames so playback lands at the requested position, not at t_page (AC9).
|
||||||
|
*/
|
||||||
|
export interface OpusSeekResolution {
|
||||||
|
/** Page-start byte offset to use as the Range request origin (Ogg-sync-aligned). */
|
||||||
|
byteOffset: number;
|
||||||
|
/**
|
||||||
|
* Presentation time of the resolved index page (seconds). Always ≤ positionSeconds. The decoder
|
||||||
|
* must trim `(positionSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE` leading frames so the
|
||||||
|
* audible start and the reported clock both land at positionSeconds, not at landingTimeSeconds.
|
||||||
|
*/
|
||||||
|
landingTimeSeconds: number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolve a seek time (seconds) to a file-absolute, page-start byte offset via the precomputed index —
|
* Resolve a seek time (seconds) to a file-absolute, page-start byte offset via the precomputed index —
|
||||||
* the accurate VBR-safe transfer function (§3.4a A/C). Binary-searches for the largest entry whose
|
* the accurate VBR-safe transfer function (§3.4a A/C). Binary-searches for the largest entry whose
|
||||||
* presentation time is <= `positionSeconds` and returns its exact page-start byte offset. NOT
|
* presentation time is <= `positionSeconds`. Returns both the page-start byte offset AND the actual
|
||||||
* interpolation, NOT byteRate math. With an empty index it degrades to the start of audio (the offset
|
* landing time of that page, so callers can trim leading decoded frames to land precisely at
|
||||||
* of the first audio page == the setup-header length, since the server emits [setup pages][audio pages]).
|
* `positionSeconds` (AC9 fine re-sync). NOT interpolation, NOT byteRate math.
|
||||||
|
*
|
||||||
|
* With an empty index it degrades to the start of audio (offset == setup-header length, landing == 0).
|
||||||
*
|
*
|
||||||
* This is the single source of truth for Opus seek-offset math, shared by the seek-beyond-buffer path
|
* This is the single source of truth for Opus seek-offset math, shared by the seek-beyond-buffer path
|
||||||
* (AudioPlayer) and any byte-offset resolver. The Range fetch from this offset lands the decoder
|
* (AudioPlayer) and any byte-offset resolver. The Range fetch from this offset lands the decoder
|
||||||
* Ogg-sync-aligned because every indexed offset is a real page start.
|
* Ogg-sync-aligned because every indexed offset is a real page start.
|
||||||
*/
|
*/
|
||||||
export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: number): number {
|
export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: number): OpusSeekResolution {
|
||||||
const points = sidecar.points;
|
const points = sidecar.points;
|
||||||
if (points.length === 0) {
|
if (points.length === 0) {
|
||||||
return sidecar.setupHeaderBytes.length;
|
return { byteOffset: sidecar.setupHeaderBytes.length, landingTimeSeconds: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
let lo = 0;
|
let lo = 0;
|
||||||
@@ -153,7 +172,10 @@ export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: nu
|
|||||||
hi = mid - 1;
|
hi = mid - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return points[best].byteOffset;
|
return {
|
||||||
|
byteOffset: points[best].byteOffset,
|
||||||
|
landingTimeSeconds: presentationTimeSeconds(points[best].granulePosition, sidecar.preSkip)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function toUint8Array(input: Uint8Array | ArrayBuffer | ArrayBufferView): Uint8Array {
|
function toUint8Array(input: Uint8Array | ArrayBuffer | ArrayBufferView): Uint8Array {
|
||||||
|
|||||||
@@ -31,7 +31,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { parseSidecar, presentationTimeSeconds, resolveOpusByteOffset, OPUS_SAMPLE_RATE } from './OpusSidecar.js';
|
import { parseSidecar, presentationTimeSeconds, resolveOpusByteOffset, OPUS_SAMPLE_RATE } from './OpusSidecar.js';
|
||||||
import type { OpusSeekData } from './OpusSidecar.js';
|
import type { OpusSeekData, OpusSeekResolution } from './OpusSidecar.js';
|
||||||
import { OggDemuxer, extractOpusHead, opusHeadChannelCount } from './OggDemuxer.js';
|
import { OggDemuxer, extractOpusHead, opusHeadChannelCount } from './OggDemuxer.js';
|
||||||
|
|
||||||
// --- tiny inline harness (no dependencies) ---------------------------------------------------
|
// --- tiny inline harness (no dependencies) ---------------------------------------------------
|
||||||
@@ -180,12 +180,12 @@ test('resolveOpusByteOffset returns the page-start of the largest entry with tim
|
|||||||
const sc = sidecarFrom({
|
const sc = sidecarFrom({
|
||||||
setupHeader: [9, 9, 9, 9], totalByteLength: 999_999, totalDuration: 1.5, preSkip: 1000, points,
|
setupHeader: [9, 9, 9, 9], totalByteLength: 999_999, totalDuration: 1.5, preSkip: 1000, points,
|
||||||
});
|
});
|
||||||
assertEqual(resolveOpusByteOffset(sc, 0.0), 4096, 't=0 -> first point');
|
assertEqual(resolveOpusByteOffset(sc, 0.0).byteOffset, 4096, 't=0 -> first point');
|
||||||
assertEqual(resolveOpusByteOffset(sc, 0.4), 4096, 'just before bucket 1');
|
assertEqual(resolveOpusByteOffset(sc, 0.4).byteOffset, 4096, 'just before bucket 1');
|
||||||
assertEqual(resolveOpusByteOffset(sc, 0.5), 9096, 'exactly bucket 1');
|
assertEqual(resolveOpusByteOffset(sc, 0.5).byteOffset, 9096, 'exactly bucket 1');
|
||||||
assertEqual(resolveOpusByteOffset(sc, 0.9), 9096, 'within bucket 1');
|
assertEqual(resolveOpusByteOffset(sc, 0.9).byteOffset, 9096, 'within bucket 1');
|
||||||
assertEqual(resolveOpusByteOffset(sc, 1.0), 14096, 'exactly bucket 2');
|
assertEqual(resolveOpusByteOffset(sc, 1.0).byteOffset, 14096, 'exactly bucket 2');
|
||||||
assertEqual(resolveOpusByteOffset(sc, 99), 19096, 'past end -> last point');
|
assertEqual(resolveOpusByteOffset(sc, 99).byteOffset, 19096, 'past end -> last point');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('resolveOpusByteOffset never interpolates between points', () => {
|
test('resolveOpusByteOffset never interpolates between points', () => {
|
||||||
@@ -193,7 +193,7 @@ test('resolveOpusByteOffset never interpolates between points', () => {
|
|||||||
setupHeader: [0], totalByteLength: 10_000, totalDuration: 1.0, preSkip: 0,
|
setupHeader: [0], totalByteLength: 10_000, totalDuration: 1.0, preSkip: 0,
|
||||||
points: [{ granule: 0, byteOffset: 100 }, { granule: OPUS_SAMPLE_RATE, byteOffset: 9000 }],
|
points: [{ granule: 0, byteOffset: 100 }, { granule: OPUS_SAMPLE_RATE, byteOffset: 9000 }],
|
||||||
});
|
});
|
||||||
assertEqual(resolveOpusByteOffset(sc, 0.5), 100, 'midpoint snaps to lower page start');
|
assertEqual(resolveOpusByteOffset(sc, 0.5).byteOffset, 100, 'midpoint snaps to lower page start');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('resolveOpusByteOffset degrades to start of audio with an empty index', () => {
|
test('resolveOpusByteOffset degrades to start of audio with an empty index', () => {
|
||||||
@@ -201,7 +201,58 @@ test('resolveOpusByteOffset degrades to start of audio with an empty index', ()
|
|||||||
setupHeader: [1, 2, 3, 4, 5], totalByteLength: 0, totalDuration: 0, preSkip: 0, points: [],
|
setupHeader: [1, 2, 3, 4, 5], totalByteLength: 0, totalDuration: 0, preSkip: 0, points: [],
|
||||||
});
|
});
|
||||||
// start of audio == setup header length (server emits [setup pages][audio pages]).
|
// start of audio == setup header length (server emits [setup pages][audio pages]).
|
||||||
assertEqual(resolveOpusByteOffset(sc, 10), 5, 'empty index degrades to audio start');
|
assertEqual(resolveOpusByteOffset(sc, 10).byteOffset, 5, 'empty index degrades to audio start');
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- resolveOpusByteOffset: landingTimeSeconds (AC9 fine re-sync, §3.4a step 4) -----------------
|
||||||
|
|
||||||
|
test('resolveOpusByteOffset landingTimeSeconds equals the resolved page time, not the requested time', () => {
|
||||||
|
// Index: two points at t=0 s and t=0.5 s.
|
||||||
|
const preSkip = 312;
|
||||||
|
const sc = sidecarFrom({
|
||||||
|
setupHeader: [0], totalByteLength: 50_000, totalDuration: 1.5, preSkip,
|
||||||
|
points: [
|
||||||
|
{ granule: preSkip, byteOffset: 4096 }, // t=0
|
||||||
|
{ granule: preSkip + OPUS_SAMPLE_RATE / 2, byteOffset: 9000 }, // t=0.5 s
|
||||||
|
],
|
||||||
|
});
|
||||||
|
// Seeking to 0.3 s lands on the t=0 page; landing should be 0, not 0.3.
|
||||||
|
const r03: OpusSeekResolution = resolveOpusByteOffset(sc, 0.3);
|
||||||
|
assertEqual(r03.byteOffset, 4096, 'seek 0.3 -> first page offset');
|
||||||
|
assertEqual(r03.landingTimeSeconds, 0, 'landing at t=0 (page time, not target)');
|
||||||
|
|
||||||
|
// Seeking to exactly 0.5 s lands on the second page; landing == requested time.
|
||||||
|
const r05: OpusSeekResolution = resolveOpusByteOffset(sc, 0.5);
|
||||||
|
assertEqual(r05.byteOffset, 9000, 'seek 0.5 -> second page offset');
|
||||||
|
assertEqual(r05.landingTimeSeconds, 0.5, 'landing == requested when exact page boundary');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('resolveOpusByteOffset empty index returns landingTimeSeconds = 0', () => {
|
||||||
|
const sc = sidecarFrom({
|
||||||
|
setupHeader: [0, 1, 2], totalByteLength: 1000, totalDuration: 1.0, preSkip: 0, points: [],
|
||||||
|
});
|
||||||
|
const r = resolveOpusByteOffset(sc, 5.0);
|
||||||
|
assertEqual(r.landingTimeSeconds, 0, 'empty index: landing is stream start (0 s)');
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- Lead-trim frame math (AC9 fine re-sync) ---------------------------------------------------
|
||||||
|
// The trim frame count is purely arithmetic: (target - landing) * 48000, rounded, clamped to ≥0.
|
||||||
|
// This is the exact formula in OpusStreamDecoder.reinitializeForRangeContinuation so we test it
|
||||||
|
// independently of the browser-bound WebCodecs decode.
|
||||||
|
|
||||||
|
function leadTrimFrames(landingTimeSeconds: number, targetTimeSeconds: number): number {
|
||||||
|
return Math.max(0, Math.round((targetTimeSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE));
|
||||||
|
}
|
||||||
|
|
||||||
|
test('lead-trim frame count is (target - landing) * 48000, rounded', () => {
|
||||||
|
// Page at t=0, seek to 0.3 s: trim 0.3 * 48000 = 14400 frames.
|
||||||
|
assertEqual(leadTrimFrames(0, 0.3), 14400, 'trim for 0.3 s offset');
|
||||||
|
// Page at t=0.5 s, seek to 0.7 s: trim 0.2 * 48000 = 9600 frames.
|
||||||
|
assertEqual(leadTrimFrames(0.5, 0.7), 9600, 'trim for 0.2 s offset');
|
||||||
|
// Exact page boundary: no trim needed.
|
||||||
|
assertEqual(leadTrimFrames(0.5, 0.5), 0, 'no trim when target == landing');
|
||||||
|
// Guard against floating-point rounding producing a tiny negative: clamp to 0.
|
||||||
|
assertEqual(leadTrimFrames(0.5000001, 0.5), 0, 'negative rounds to zero (guard)');
|
||||||
});
|
});
|
||||||
|
|
||||||
// --- OggDemuxer: page -> packet extraction ----------------------------------------------------
|
// --- OggDemuxer: page -> packet extraction ----------------------------------------------------
|
||||||
|
|||||||
@@ -52,6 +52,11 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
|||||||
private decodedQueue: AudioData[] = [];
|
private decodedQueue: AudioData[] = [];
|
||||||
private fatalError = false;
|
private fatalError = false;
|
||||||
|
|
||||||
|
// Frames to discard from the head of the first post-seek decoded output (AC9 fine re-sync).
|
||||||
|
// Set by reinitializeForRangeContinuation to (targetTimeSeconds - landingTimeSeconds) * 48000,
|
||||||
|
// consumed frame-by-frame in audioDataToBuffer until exhausted (then zero for the rest of the stream).
|
||||||
|
private leadTrimFrames = 0;
|
||||||
|
|
||||||
// Monotonic packet timestamp (microseconds) handed to each EncodedAudioChunk. WebCodecs requires
|
// Monotonic packet timestamp (microseconds) handed to each EncodedAudioChunk. WebCodecs requires
|
||||||
// strictly increasing timestamps; the true value is irrelevant to us (we schedule by accumulation),
|
// strictly increasing timestamps; the true value is irrelevant to us (we schedule by accumulation),
|
||||||
// so a synthetic 48 kHz-derived counter suffices and stays exact.
|
// so a synthetic 48 kHz-derived counter suffices and stays exact.
|
||||||
@@ -135,14 +140,14 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
|||||||
|
|
||||||
const packets = this.demuxer.push(chunk);
|
const packets = this.demuxer.push(chunk);
|
||||||
this.decodePackets(packets);
|
this.decodePackets(packets);
|
||||||
// Give the WebCodecs output callback a chance to run before we drain.
|
// Wait until the WebCodecs decoder has processed the queued packets before draining.
|
||||||
await this.yieldToDecoder();
|
await this.yieldToDecoder();
|
||||||
return this.drainDecoded(false);
|
return this.drainDecoded();
|
||||||
}
|
}
|
||||||
|
|
||||||
async complete(): Promise<AudioBuffer[]> {
|
async complete(): Promise<AudioBuffer[]> {
|
||||||
if (this.fatalError || !this.decoder || this.decoder.state !== 'configured') {
|
if (this.fatalError || !this.decoder || this.decoder.state !== 'configured') {
|
||||||
return this.drainDecoded(true);
|
return this.drainDecoded();
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
await this.decoder.flush();
|
await this.decoder.flush();
|
||||||
@@ -151,10 +156,28 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
|||||||
// own cancellation handles that — surface nothing, just drain what we have.
|
// own cancellation handles that — surface nothing, just drain what we have.
|
||||||
console.warn('Opus decoder flush interrupted:', (err as Error).message);
|
console.warn('Opus decoder flush interrupted:', (err as Error).message);
|
||||||
}
|
}
|
||||||
return this.drainDecoded(true);
|
return this.drainDecoded();
|
||||||
}
|
}
|
||||||
|
|
||||||
reinitializeForRangeContinuation(): void {
|
/**
|
||||||
|
* Reinitialize for a Range-continuation stream after seek-beyond-buffer.
|
||||||
|
*
|
||||||
|
* @param landingTimeSeconds The actual page-start presentation time resolved from the seek index
|
||||||
|
* (t_page ≤ targetTimeSeconds). This is the time at which the decoder
|
||||||
|
* will begin emitting audio after reconfigure.
|
||||||
|
* @param targetTimeSeconds The user-requested seek position. The difference
|
||||||
|
* `(target - landing) * OPUS_SAMPLE_RATE` frames are trimmed from the
|
||||||
|
* head of the decoded output so playback lands precisely at the target
|
||||||
|
* (AC9 fine re-sync, §3.4a step 4).
|
||||||
|
*
|
||||||
|
* Pre-skip note: the reconfigure re-applies the WebCodecs Opus decoder's own pre-skip trim. The
|
||||||
|
* W3C spec is non-normative on the exact sample count and browsers vary (~312 samples at 48 kHz in
|
||||||
|
* practice). `leadTrimFrames` is computed from the sidecar's pre-skip-corrected presentation times
|
||||||
|
* (via `presentationTimeSeconds`), so it does NOT double-count the per-reconfigure pre-skip; the
|
||||||
|
* decoder handles that internally. If browser testing reveals a residual offset, adjust the
|
||||||
|
* `leadTrimFrames` calculation here — this is the single point of control.
|
||||||
|
*/
|
||||||
|
reinitializeForRangeContinuation(landingTimeSeconds: number, targetTimeSeconds: number): void {
|
||||||
// New 206 body starts on a page boundary with no setup pages; the codec config is unchanged but
|
// New 206 body starts on a page boundary with no setup pages; the codec config is unchanged but
|
||||||
// inter-frame state must restart cleanly. AudioDecoder.reset() drops queued work and returns the
|
// inter-frame state must restart cleanly. AudioDecoder.reset() drops queued work and returns the
|
||||||
// decoder to 'unconfigured', so we reconfigure with the cached config. The demuxer goes into
|
// decoder to 'unconfigured', so we reconfigure with the cached config. The demuxer goes into
|
||||||
@@ -162,6 +185,9 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
|||||||
this.demuxer.reset(true);
|
this.demuxer.reset(true);
|
||||||
this.decodedQueue = [];
|
this.decodedQueue = [];
|
||||||
this.emittedFrames = 0; // post-seek buffers are positioned by the scheduler's playbackOffset
|
this.emittedFrames = 0; // post-seek buffers are positioned by the scheduler's playbackOffset
|
||||||
|
// Arm the lead trim: skip enough decoded frames to land at targetTimeSeconds, not at
|
||||||
|
// landingTimeSeconds (the page start). Clamp to ≥0 to guard against floating-point rounding.
|
||||||
|
this.leadTrimFrames = Math.max(0, Math.round((targetTimeSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE));
|
||||||
if (this.decoder && this.decoder.state === 'configured') {
|
if (this.decoder && this.decoder.state === 'configured') {
|
||||||
this.decoder.reset();
|
this.decoder.reset();
|
||||||
this.decoder.configure(this.buildConfig());
|
this.decoder.configure(this.buildConfig());
|
||||||
@@ -204,10 +230,10 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert every queued AudioData into an AudioBuffer at the context sample rate, applying end-trim
|
* Convert every queued AudioData into an AudioBuffer at the context sample rate, applying
|
||||||
* against the known total frame count. `final` allows the very last partial buffer to be emitted.
|
* end-trim against the known total frame count and lead-trim for post-seek fine re-sync.
|
||||||
*/
|
*/
|
||||||
private drainDecoded(_final: boolean): AudioBuffer[] {
|
private drainDecoded(): AudioBuffer[] {
|
||||||
const out: AudioBuffer[] = [];
|
const out: AudioBuffer[] = [];
|
||||||
const ctx = this.contextManager.getContext();
|
const ctx = this.contextManager.getContext();
|
||||||
|
|
||||||
@@ -224,39 +250,66 @@ export class OpusStreamDecoder implements IStreamingDecoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy an AudioData's PCM into a new AudioBuffer, trimming to not exceed the known total length
|
* Copy an AudioData's PCM into a new AudioBuffer, applying:
|
||||||
* (end-trim). Returns null if the trim leaves zero frames (the buffer is entirely past the end).
|
* 1. Lead-trim (post-seek fine re-sync): skip `leadTrimFrames` from the front so the audible
|
||||||
|
* start lands at the requested seek position, not at the preceding page boundary (AC9).
|
||||||
|
* 2. End-trim: cap cumulative output at `totalFrames` so the final partial frame's padding
|
||||||
|
* does not leak past the true stream end.
|
||||||
|
* Returns null when either trim leaves zero usable frames.
|
||||||
*/
|
*/
|
||||||
private audioDataToBuffer(ctx: BaseAudioContext, data: AudioData): AudioBuffer | null {
|
private audioDataToBuffer(ctx: BaseAudioContext, data: AudioData): AudioBuffer | null {
|
||||||
const frames = data.numberOfFrames;
|
const frames = data.numberOfFrames;
|
||||||
const channels = data.numberOfChannels;
|
const channels = data.numberOfChannels;
|
||||||
|
|
||||||
|
// Lead-trim: consume frames from the front for post-seek fine re-sync (AC9).
|
||||||
|
let skip = 0;
|
||||||
|
if (this.leadTrimFrames > 0) {
|
||||||
|
skip = Math.min(this.leadTrimFrames, frames);
|
||||||
|
this.leadTrimFrames -= skip;
|
||||||
|
}
|
||||||
|
const available = frames - skip;
|
||||||
|
if (available <= 0) return null;
|
||||||
|
|
||||||
// End-trim: cap cumulative output at totalFrames.
|
// End-trim: cap cumulative output at totalFrames.
|
||||||
let keep = frames;
|
let keep = available;
|
||||||
if (Number.isFinite(this.totalFrames)) {
|
if (Number.isFinite(this.totalFrames)) {
|
||||||
const room = this.totalFrames - this.emittedFrames;
|
const room = this.totalFrames - this.emittedFrames;
|
||||||
if (room <= 0) return null;
|
if (room <= 0) return null;
|
||||||
if (room < frames) keep = room;
|
if (room < available) keep = room;
|
||||||
}
|
}
|
||||||
if (keep <= 0) return null;
|
if (keep <= 0) return null;
|
||||||
|
|
||||||
const buffer = ctx.createBuffer(channels, keep, data.sampleRate);
|
const buffer = ctx.createBuffer(channels, keep, data.sampleRate);
|
||||||
const plane = new Float32Array(frames); // copyTo fills the full frame count, then we slice
|
// Allocate only for the frames we actually copy; frameOffset skips the lead-trim region.
|
||||||
|
const plane = new Float32Array(keep);
|
||||||
for (let ch = 0; ch < channels; ch++) {
|
for (let ch = 0; ch < channels; ch++) {
|
||||||
data.copyTo(plane, { planeIndex: ch, format: 'f32-planar' });
|
data.copyTo(plane, { planeIndex: ch, frameOffset: skip, frameCount: keep, format: 'f32-planar' });
|
||||||
buffer.copyToChannel(keep === frames ? plane : plane.subarray(0, keep), ch);
|
buffer.copyToChannel(plane, ch);
|
||||||
}
|
}
|
||||||
this.emittedFrames += keep;
|
this.emittedFrames += keep;
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Yield to the microtask/event loop so the synchronous decode() calls above let their async output
|
* Wait until the AudioDecoder's internal work queue drains (decodeQueueSize → 0), so output
|
||||||
* callbacks fire before we drain. A zero-delay timeout (macrotask) is the reliable cross-engine way
|
* callbacks have fired before we drain decodedQueue. Bounded to MAX_YIELD_ITERS × 4 ms to guard
|
||||||
* to let WebCodecs deliver outputs; awaiting decodeQueueSize draining is the precise alternative but
|
* against a stuck decoder; any outputs collected before the cap are still returned. `complete()`
|
||||||
* not all engines settle it synchronously.
|
* uses decoder.flush() as its final barrier instead (flush() is the authoritative end-of-stream
|
||||||
|
* drain).
|
||||||
*/
|
*/
|
||||||
private yieldToDecoder(): Promise<void> {
|
private yieldToDecoder(): Promise<void> {
|
||||||
return new Promise((resolve) => setTimeout(resolve, 0));
|
const MAX_YIELD_ITERS = 50; // 50 × 4 ms = 200 ms ceiling
|
||||||
|
return new Promise<void>((resolve) => {
|
||||||
|
let iters = 0;
|
||||||
|
const poll = () => {
|
||||||
|
if (!this.decoder || this.decoder.decodeQueueSize === 0 || iters >= MAX_YIELD_ITERS) {
|
||||||
|
resolve();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
iters++;
|
||||||
|
setTimeout(poll, 4);
|
||||||
|
};
|
||||||
|
poll();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user