Files
deepdrft/DeepDrftPublic/Interop/audio/OggDemuxer.ts
T
daniel-c-harvey 5a75da1769 fix: AC9 seek fine re-sync + deterministic decoder drain (WebCodecs Opus)
Seek now trims the lead-in so playback lands at the requested time, not the page start; decoder drain polls decodeQueueSize (bounded) instead of a single timeout. Minor cleanups.
2026-06-23 20:57:05 -04:00

299 lines
14 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* OggDemuxer - streaming Ogg-page -> Opus-packet demuxer for the WebCodecs decode path.
*
* Ogg Opus is a containerized, paged format. To feed a WebCodecs `AudioDecoder` we must extract the
* individual Opus *packets* from the Ogg container — the decoder takes packets (as `EncodedAudioChunk`s),
* not raw container bytes. This module is the client-side analogue of the C# `OggOpusParser`: it reads
* the page structure directly (the "OggS" capture pattern + the 27-byte page header + segment table) and
* reassembles packets across the lacing, tracking the granule position that gives each packet its time.
*
* It is deliberately *streaming*: `push(bytes)` accepts arbitrary network chunks (a packet, a page, or a
* fraction of either) and returns whatever WHOLE packets have become available, holding partial state
* across calls. This matches how `StreamAudioWithEarlyPlayback` feeds bytes in adaptive 1664 KB chunks.
*
* Lacing rules (RFC 3533 §6): a page's segment table lists N segment lengths (0255). A packet is the
* concatenation of consecutive segments up to and including the first segment whose length is < 255. A
* segment of exactly 255 means "this packet continues into the next segment" — and if it is the page's
* LAST segment, the packet continues into the next page (the next page's header-type has the
* continuation flag set). The granule position on a page is the end-granule of the LAST packet that
* *completes* on that page.
*
* The two leading setup packets (OpusHead, OpusTags) are NOT audio and are skipped — they configure the
* decoder (the sidecar carries them as the codec description), they are never decoded as audio packets.
*/
const OGG_CAPTURE = [0x4f, 0x67, 0x67, 0x53]; // "OggS"
const OGG_PAGE_HEADER_SIZE = 27;
const GRANULE_OFFSET = 6; // 64-bit granule position within the page header
const HEADER_TYPE_OFFSET = 5; // bit 0x01 = continued packet, 0x02 = BOS, 0x04 = EOS
const SEGMENT_COUNT_OFFSET = 26; // number of segment-table entries
const CONTINUATION_FLAG = 0x01;
const OPUS_HEAD_SIG = [0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64]; // "OpusHead"
const OPUS_TAGS_SIG = [0x4f, 0x70, 0x75, 0x73, 0x54, 0x61, 0x67, 0x73]; // "OpusTags"
/** A demuxed Opus audio packet plus the timing context needed to schedule and trim it. */
export interface OpusPacket {
/** Raw Opus packet bytes (one Opus frame's worth — fed straight to the AudioDecoder). */
data: Uint8Array;
/**
* The end-granule of the page this packet completed on, or null if the page carried no usable
* granule (mid-stream pages between completion points share the next completing page's granule —
* we attach the granule only to the packet that completes on a granule-bearing page). A 48 kHz
* sample count; presentation time = (granule - preSkip) / 48000.
*/
pageGranule: number | null;
/** True when this packet completed on the stream's final (EOS) page — drives end-trim. */
isLastPage: boolean;
}
/** Read a little-endian uint64 as a JS number (exact to 2^53 — far beyond any real granule). */
function readUint64LE(buf: Uint8Array, offset: number): number {
let lo = 0;
let hi = 0;
for (let i = 0; i < 4; i++) lo += buf[offset + i] * 2 ** (8 * i);
for (let i = 0; i < 4; i++) hi += buf[offset + 4 + i] * 2 ** (8 * i);
return hi * 0x100000000 + lo;
}
function startsWith(buf: Uint8Array, sig: number[]): boolean {
if (buf.length < sig.length) return false;
for (let i = 0; i < sig.length; i++) if (buf[i] !== sig[i]) return false;
return true;
}
export class OggDemuxer {
// Unconsumed raw bytes carried across push() calls (a page may straddle a network-chunk boundary).
private pending: Uint8Array = new Uint8Array(0);
// Bytes of a packet that spans pages (255-length last segment + continuation flag next page).
private partialPacket: Uint8Array[] = [];
// Once both setup packets are seen, every subsequent packet is audio.
private setupPacketsSeen = 0;
/**
* Feed raw stream bytes (any size). Returns all WHOLE Opus AUDIO packets that became decodable,
* in order. Setup packets (OpusHead/OpusTags) are consumed and skipped. Incomplete trailing bytes
* are retained for the next push.
*/
push(bytes: Uint8Array): OpusPacket[] {
this.pending = this.concat(this.pending, bytes);
return this.drainPages();
}
/**
* Reset to a fresh stream. Used on seek/range-continuation: the new 206 body begins on a page
* boundary, so all partial-packet and pending state must be dropped. setupPacketsSeen is reset to
* 2 (already configured) for a continuation — a mid-stream slice carries no setup pages, only audio
* pages — so the demuxer treats the first page's packets as audio immediately.
*/
reset(isContinuation: boolean): void {
this.pending = new Uint8Array(0);
this.partialPacket = [];
this.setupPacketsSeen = isContinuation ? 2 : 0;
}
private drainPages(): OpusPacket[] {
const packets: OpusPacket[] = [];
for (;;) {
const page = this.tryReadPage();
if (!page) break;
this.parsePage(page, packets);
}
return packets;
}
/**
* Try to slice one complete Ogg page off the front of `pending`. Returns null (and leaves `pending`
* intact) when a whole page is not yet buffered. Resynchronises by scanning for "OggS" if `pending`
* does not start on a page boundary (defensive — the encoder writes contiguous pages, but a
* continuation stream could in theory begin mid-garbage; the seek offset is always a page start).
*/
private tryReadPage(): { header: Uint8Array; segTable: Uint8Array; payload: Uint8Array; total: number } | null {
const buf = this.pending;
if (buf.length < OGG_PAGE_HEADER_SIZE) return null;
// Resync: ensure we are positioned at a capture pattern.
if (!startsWith(buf, OGG_CAPTURE)) {
const sync = this.findCapture(buf, 0);
if (sync < 0) {
// No capture pattern at all — keep only the last 3 bytes (a capture could straddle).
this.pending = buf.subarray(Math.max(0, buf.length - 3));
return null;
}
this.pending = buf.subarray(sync);
return this.tryReadPage();
}
const segCount = buf[SEGMENT_COUNT_OFFSET];
const segTableEnd = OGG_PAGE_HEADER_SIZE + segCount;
if (buf.length < segTableEnd) return null; // segment table not fully buffered yet
const segTable = buf.subarray(OGG_PAGE_HEADER_SIZE, segTableEnd);
let payloadSize = 0;
for (let i = 0; i < segCount; i++) payloadSize += segTable[i];
const total = segTableEnd + payloadSize;
if (buf.length < total) return null; // payload not fully buffered yet
const header = buf.subarray(0, OGG_PAGE_HEADER_SIZE);
const payload = buf.subarray(segTableEnd, total);
// Advance past this page.
this.pending = buf.subarray(total);
return { header, segTable, payload, total };
}
private parsePage(
page: { header: Uint8Array; segTable: Uint8Array; payload: Uint8Array; total: number },
out: OpusPacket[]
): void {
const { header, segTable, payload } = page;
const headerType = header[HEADER_TYPE_OFFSET];
const continued = (headerType & CONTINUATION_FLAG) !== 0;
const isEos = (headerType & 0x04) !== 0;
const granule = readUint64LE(header, GRANULE_OFFSET);
// 0xFFFFFFFFFFFFFFFF (-1) means "no packet completed on this page" — no usable timestamp.
// We check the raw bytes rather than comparing `granule === -1` (or the equivalent JS number):
// the full 64-bit sentinel exceeds 2^53 and cannot be represented exactly as an IEEE-754 double,
// so the parsed value from readUint64LE would not equal the sentinel. The byte check is exact.
const hasGranule = !(header[GRANULE_OFFSET] === 0xff && header[GRANULE_OFFSET + 1] === 0xff &&
header[GRANULE_OFFSET + 2] === 0xff && header[GRANULE_OFFSET + 3] === 0xff &&
header[GRANULE_OFFSET + 4] === 0xff && header[GRANULE_OFFSET + 5] === 0xff &&
header[GRANULE_OFFSET + 6] === 0xff && header[GRANULE_OFFSET + 7] === 0xff);
// If this page does NOT begin with a continuation, any half-built packet from a prior page is
// orphaned (should not happen in a well-formed stream, but never carry garbage forward).
if (!continued) this.partialPacket = [];
// Walk the segment table, reassembling packets. A packet ends at the first segment < 255.
const completedPackets: Uint8Array[] = [];
let segStart = 0;
let cursor = 0;
for (let i = 0; i < segTable.length; i++) {
const len = segTable[i];
cursor += len;
if (len < 255) {
// Packet boundary: segments [segStart, cursor) form (the tail of) a packet.
const slice = payload.subarray(segStart, cursor);
if (this.partialPacket.length > 0) {
this.partialPacket.push(slice);
completedPackets.push(this.flattenPartial());
this.partialPacket = [];
} else {
completedPackets.push(slice);
}
segStart = cursor;
}
// len === 255 with i === last segment -> packet spans into the next page (handled below).
}
// Any trailing 255-run that did not terminate is a packet continuing into the next page.
if (segStart < cursor) {
this.partialPacket.push(payload.subarray(segStart, cursor));
}
// Classify completed packets: the first two whole packets in the whole stream are the setup
// packets (OpusHead, OpusTags) and are skipped. Everything after is audio. The page granule is
// attached to the LAST completing audio packet on a granule-bearing page (the granule is that
// page's end-granule per RFC 7845).
for (let p = 0; p < completedPackets.length; p++) {
const pkt = completedPackets[p];
if (this.setupPacketsSeen < 2) {
// Only count packets that are actually the Opus setup headers; guard against a stray
// first audio packet being mistaken for setup on a continuation (reset handles that).
if (this.setupPacketsSeen === 0 && startsWith(pkt, OPUS_HEAD_SIG)) {
this.setupPacketsSeen = 1;
continue;
}
if (this.setupPacketsSeen === 1 && startsWith(pkt, OPUS_TAGS_SIG)) {
this.setupPacketsSeen = 2;
continue;
}
// Not a recognised setup packet while we still expected one — treat as audio (a
// continuation slice that began mid-stream). Fall through.
}
const isLastCompleting = p === completedPackets.length - 1;
out.push({
data: pkt,
pageGranule: hasGranule && isLastCompleting ? granule : null,
isLastPage: isEos
});
}
}
private flattenPartial(): Uint8Array {
if (this.partialPacket.length === 1) return this.partialPacket[0];
let len = 0;
for (const s of this.partialPacket) len += s.length;
const out = new Uint8Array(len);
let o = 0;
for (const s of this.partialPacket) {
out.set(s, o);
o += s.length;
}
return out;
}
private findCapture(buf: Uint8Array, from: number): number {
for (let i = from; i + 4 <= buf.length; i++) {
if (buf[i] === OGG_CAPTURE[0] && buf[i + 1] === OGG_CAPTURE[1] &&
buf[i + 2] === OGG_CAPTURE[2] && buf[i + 3] === OGG_CAPTURE[3]) {
return i;
}
}
return -1;
}
private concat(a: Uint8Array, b: Uint8Array): Uint8Array {
if (a.length === 0) return b;
if (b.length === 0) return a;
const out = new Uint8Array(a.length + b.length);
out.set(a, 0);
out.set(b, a.length);
return out;
}
}
/**
* Extract the raw OpusHead identification-header *packet* from the sidecar's setup-header bytes (which
* are the verbatim Ogg PAGES wrapping OpusHead + OpusTags). WebCodecs' `AudioDecoderConfig.description`
* for Opus is the OpusHead packet (RFC 7845 §5.1), not the Ogg page — so we demux the setup pages and
* return the first packet's bytes. Returns null if no OpusHead packet is found.
*/
export function extractOpusHead(setupHeaderBytes: Uint8Array): Uint8Array | null {
// Walk pages manually (the setup region is small — at most two pages) and return the first packet
// that starts with the OpusHead signature.
let offset = 0;
while (offset + OGG_PAGE_HEADER_SIZE <= setupHeaderBytes.length) {
if (!(setupHeaderBytes[offset] === OGG_CAPTURE[0] && setupHeaderBytes[offset + 1] === OGG_CAPTURE[1] &&
setupHeaderBytes[offset + 2] === OGG_CAPTURE[2] && setupHeaderBytes[offset + 3] === OGG_CAPTURE[3])) {
return null;
}
const segCount = setupHeaderBytes[offset + SEGMENT_COUNT_OFFSET];
const segTableEnd = offset + OGG_PAGE_HEADER_SIZE + segCount;
if (segTableEnd > setupHeaderBytes.length) return null;
let payloadSize = 0;
for (let i = 0; i < segCount; i++) payloadSize += setupHeaderBytes[segTableEnd - segCount + i];
const payloadStart = segTableEnd;
const payloadEnd = payloadStart + payloadSize;
if (payloadEnd > setupHeaderBytes.length) return null;
const payload = setupHeaderBytes.subarray(payloadStart, payloadEnd);
if (startsWith(payload, OPUS_HEAD_SIG)) {
// The OpusHead packet is the whole first-page payload (it always fits one segment / page).
return payload;
}
offset = payloadEnd;
}
return null;
}
/** Channel count from an OpusHead packet (RFC 7845 §5.1: byte 9, after the 8-byte magic + version). */
export function opusHeadChannelCount(opusHead: Uint8Array): number {
if (opusHead.length < 10) return 2; // safe nominal
return opusHead[9];
}