Files
deepdrft/DeepDrftPublic/Interop/audio/OpusSidecar.ts
T
daniel-c-harvey 5a75da1769 fix: AC9 seek fine re-sync + deterministic decoder drain (WebCodecs Opus)
Seek now trims the lead-in so playback lands at the requested time, not the page start; decoder drain polls decodeQueueSize (bounded) instead of a single timeout. Minor cleanups.
2026-06-23 20:57:05 -04:00

197 lines
8.7 KiB
TypeScript

/**
* OpusSidecar - parser for the per-track Opus seek/setup sidecar artifact.
*
* The sidecar is built once at transcode time (wave 18.1, C# `OpusSidecar` /
* `OggOpusSeekIndex`) and fetched once on track load (wired by wave 18.5). It carries
* everything the client needs to seek a VBR Opus stream accurately and to decode any
* mid-stream slice:
* - the verbatim OpusHead + OpusTags setup pages (prepended to every post-seek slice),
* - the precomputed granule->byte seek index (the exact time->byte transfer function),
* - the pre_skip and totals needed for presentation-time math and seek clamping.
*
* This module is the byte-for-byte counterpart to the C# serializer. It is pure: it parses
* a blob into an `OpusSeekData` accelerator with no I/O. Wave 18.5 owns the HTTP fetch and
* injects the parsed result into `OpusFormatDecoder.setSidecar`.
*
* Binary layout (all little-endian), matching DeepDrftContent.Processors.Opus:
* [uint32 setupHeaderLength]
* [setupHeaderLength bytes -> OpusHead + OpusTags pages]
* [seek-index blob]:
* header (24 bytes):
* uint64 totalByteLength
* double totalDurationSeconds (pre-skip-corrected)
* uint32 pointCount
* uint16 preSkip
* uint16 reserved
* pointCount x 16-byte points:
* uint64 granulePosition (48 kHz sample count)
* uint64 byteOffset (page-start offset in the Opus file)
*/
/** Opus granule positions are always 48 kHz sample counts, regardless of input rate. */
export const OPUS_SAMPLE_RATE = 48000;
/** Size of the seek-index blob header: totalBytes(8) + duration(8) + count(4) + preSkip(2) + reserved(2). */
const SEEK_INDEX_HEADER_SIZE = 24;
/** Size of one serialized seek point: granulepos(8) + byteOffset(8). */
const SEEK_POINT_SIZE = 16;
/** One (granule, byteOffset) seek-index entry. Both are page-start-accurate. */
export interface OpusSeekPoint {
/** Page end granule position — a 48 kHz sample count. */
granulePosition: number;
/** Byte offset of the page start in the Opus file. */
byteOffset: number;
}
/**
* Parsed sidecar: the `seekData` accelerator the `OpusFormatDecoder` holds for the stream's
* lifetime. Holds the setup bytes (for `wrapSegment` carry) and the index (for `calculateByteOffset`).
*/
export interface OpusSeekData {
kind: 'opus-sidecar';
/** Verbatim OpusHead + OpusTags pages, prepended to every decodable segment. */
setupHeaderBytes: Uint8Array;
/** Ordered (granule, byteOffset) entries, ascending by granule. */
points: OpusSeekPoint[];
/** Pre-skip-corrected total stream duration in seconds. */
totalDurationSeconds: number;
/** Total Opus file byte length, for clamping a seek past the end. */
totalByteLength: number;
/** pre_skip from OpusHead (RFC 7845 §5.1); samples to discard before presentation. */
preSkip: number;
}
/**
* Parse a sidecar blob produced by the C# `OpusSidecar.ToBytes`. Returns null on any structural
* inconsistency (short blob, length prefix overrun, declared point count that does not fit) —
* the format is exact, so a malformed blob is corruption, not a recoverable shape.
*
* Accepts a `Uint8Array`, an `ArrayBuffer`, or a typed-array view; copies nothing it can borrow.
*/
export function parseSidecar(input: Uint8Array | ArrayBuffer | ArrayBufferView): OpusSeekData | null {
const bytes = toUint8Array(input);
// DataView over the same backing buffer; honour the view's byteOffset so a borrowed view parses.
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
if (bytes.byteLength < 4) return null;
const setupLength = view.getUint32(0, true);
const indexStart = 4 + setupLength;
// Need the setup region plus at least the index header.
if (bytes.byteLength < indexStart + SEEK_INDEX_HEADER_SIZE) return null;
// subarray is zero-copy; setup bytes are retained for wrapSegment for the stream's lifetime.
const setupHeaderBytes = bytes.subarray(4, indexStart);
// Seek-index blob header (relative to the DataView, which is bytes-relative).
const totalByteLength = readUint64(view, indexStart);
const totalDurationSeconds = view.getFloat64(indexStart + 8, true);
const pointCount = view.getUint32(indexStart + 16, true);
const preSkip = view.getUint16(indexStart + 20, true);
// bytes 22-23: reserved — ignored on read, for forward-compatibility (matches C#).
const pointsStart = indexStart + SEEK_INDEX_HEADER_SIZE;
const expectedEnd = pointsStart + pointCount * SEEK_POINT_SIZE;
if (bytes.byteLength < expectedEnd) return null;
const points: OpusSeekPoint[] = new Array(pointCount);
let cursor = pointsStart;
for (let i = 0; i < pointCount; i++) {
const granulePosition = readUint64(view, cursor);
const byteOffset = readUint64(view, cursor + 8);
points[i] = { granulePosition, byteOffset };
cursor += SEEK_POINT_SIZE;
}
return {
kind: 'opus-sidecar',
setupHeaderBytes,
points,
totalDurationSeconds,
totalByteLength,
preSkip
};
}
/**
* Pre-skip-corrected presentation time for a granule position: max(0, (granule - preSkip) / 48000).
* Matches the C# `OggOpusSeekIndex.PresentationTimeSeconds` so client and server agree on the
* seek transfer function.
*/
export function presentationTimeSeconds(granulePosition: number, preSkip: number): number {
return Math.max(0, (granulePosition - preSkip) / OPUS_SAMPLE_RATE);
}
/**
* Result of resolving a seek time to a page-start byte offset.
* `byteOffset` is the Range request origin; `landingTimeSeconds` is the actual presentation time of that
* page (t_page ≤ positionSeconds). The caller uses the delta `positionSeconds - landingTimeSeconds` to
* trim the decoded leading frames so playback lands at the requested position, not at t_page (AC9).
*/
export interface OpusSeekResolution {
/** Page-start byte offset to use as the Range request origin (Ogg-sync-aligned). */
byteOffset: number;
/**
* Presentation time of the resolved index page (seconds). Always ≤ positionSeconds. The decoder
* must trim `(positionSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE` leading frames so the
* audible start and the reported clock both land at positionSeconds, not at landingTimeSeconds.
*/
landingTimeSeconds: number;
}
/**
* Resolve a seek time (seconds) to a file-absolute, page-start byte offset via the precomputed index —
* the accurate VBR-safe transfer function (§3.4a A/C). Binary-searches for the largest entry whose
* presentation time is <= `positionSeconds`. Returns both the page-start byte offset AND the actual
* landing time of that page, so callers can trim leading decoded frames to land precisely at
* `positionSeconds` (AC9 fine re-sync). NOT interpolation, NOT byteRate math.
*
* With an empty index it degrades to the start of audio (offset == setup-header length, landing == 0).
*
* This is the single source of truth for Opus seek-offset math, shared by the seek-beyond-buffer path
* (AudioPlayer) and any byte-offset resolver. The Range fetch from this offset lands the decoder
* Ogg-sync-aligned because every indexed offset is a real page start.
*/
export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: number): OpusSeekResolution {
const points = sidecar.points;
if (points.length === 0) {
return { byteOffset: sidecar.setupHeaderBytes.length, landingTimeSeconds: 0 };
}
let lo = 0;
let hi = points.length - 1;
let best = 0;
while (lo <= hi) {
const mid = (lo + hi) >> 1;
const t = presentationTimeSeconds(points[mid].granulePosition, sidecar.preSkip);
if (t <= positionSeconds) {
best = mid;
lo = mid + 1;
} else {
hi = mid - 1;
}
}
return {
byteOffset: points[best].byteOffset,
landingTimeSeconds: presentationTimeSeconds(points[best].granulePosition, sidecar.preSkip)
};
}
function toUint8Array(input: Uint8Array | ArrayBuffer | ArrayBufferView): Uint8Array {
if (input instanceof Uint8Array) return input;
if (input instanceof ArrayBuffer) return new Uint8Array(input);
return new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
}
/**
* Read a little-endian uint64 as a JS number. Opus byte offsets and granule positions are exact
* to 2^53 (~8 PB / ~5,700 years of audio at 48 kHz), far beyond any real file — no BigInt needed,
* matching the FLAC seektable's same 2^53 assumption.
*/
function readUint64(view: DataView, offset: number): number {
const lo = view.getUint32(offset, true);
const hi = view.getUint32(offset + 4, true);
return hi * 0x100000000 + lo;
}