5a75da1769
Seek now trims the lead-in so playback lands at the requested time, not the page start; decoder drain polls decodeQueueSize (bounded) instead of a single timeout. Minor cleanups.
299 lines
14 KiB
TypeScript
299 lines
14 KiB
TypeScript
/**
|
||
* OggDemuxer - streaming Ogg-page -> Opus-packet demuxer for the WebCodecs decode path.
|
||
*
|
||
* Ogg Opus is a containerized, paged format. To feed a WebCodecs `AudioDecoder` we must extract the
|
||
* individual Opus *packets* from the Ogg container — the decoder takes packets (as `EncodedAudioChunk`s),
|
||
* not raw container bytes. This module is the client-side analogue of the C# `OggOpusParser`: it reads
|
||
* the page structure directly (the "OggS" capture pattern + the 27-byte page header + segment table) and
|
||
* reassembles packets across the lacing, tracking the granule position that gives each packet its time.
|
||
*
|
||
* It is deliberately *streaming*: `push(bytes)` accepts arbitrary network chunks (a packet, a page, or a
|
||
* fraction of either) and returns whatever WHOLE packets have become available, holding partial state
|
||
* across calls. This matches how `StreamAudioWithEarlyPlayback` feeds bytes in adaptive 16–64 KB chunks.
|
||
*
|
||
* Lacing rules (RFC 3533 §6): a page's segment table lists N segment lengths (0–255). A packet is the
|
||
* concatenation of consecutive segments up to and including the first segment whose length is < 255. A
|
||
* segment of exactly 255 means "this packet continues into the next segment" — and if it is the page's
|
||
* LAST segment, the packet continues into the next page (the next page's header-type has the
|
||
* continuation flag set). The granule position on a page is the end-granule of the LAST packet that
|
||
* *completes* on that page.
|
||
*
|
||
* The two leading setup packets (OpusHead, OpusTags) are NOT audio and are skipped — they configure the
|
||
* decoder (the sidecar carries them as the codec description), they are never decoded as audio packets.
|
||
*/
|
||
|
||
const OGG_CAPTURE = [0x4f, 0x67, 0x67, 0x53]; // "OggS"
|
||
const OGG_PAGE_HEADER_SIZE = 27;
|
||
const GRANULE_OFFSET = 6; // 64-bit granule position within the page header
|
||
const HEADER_TYPE_OFFSET = 5; // bit 0x01 = continued packet, 0x02 = BOS, 0x04 = EOS
|
||
const SEGMENT_COUNT_OFFSET = 26; // number of segment-table entries
|
||
const CONTINUATION_FLAG = 0x01;
|
||
|
||
const OPUS_HEAD_SIG = [0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64]; // "OpusHead"
|
||
const OPUS_TAGS_SIG = [0x4f, 0x70, 0x75, 0x73, 0x54, 0x61, 0x67, 0x73]; // "OpusTags"
|
||
|
||
/** A demuxed Opus audio packet plus the timing context needed to schedule and trim it. */
|
||
export interface OpusPacket {
|
||
/** Raw Opus packet bytes (one Opus frame's worth — fed straight to the AudioDecoder). */
|
||
data: Uint8Array;
|
||
/**
|
||
* The end-granule of the page this packet completed on, or null if the page carried no usable
|
||
* granule (mid-stream pages between completion points share the next completing page's granule —
|
||
* we attach the granule only to the packet that completes on a granule-bearing page). A 48 kHz
|
||
* sample count; presentation time = (granule - preSkip) / 48000.
|
||
*/
|
||
pageGranule: number | null;
|
||
/** True when this packet completed on the stream's final (EOS) page — drives end-trim. */
|
||
isLastPage: boolean;
|
||
}
|
||
|
||
/** Read a little-endian uint64 as a JS number (exact to 2^53 — far beyond any real granule). */
|
||
function readUint64LE(buf: Uint8Array, offset: number): number {
|
||
let lo = 0;
|
||
let hi = 0;
|
||
for (let i = 0; i < 4; i++) lo += buf[offset + i] * 2 ** (8 * i);
|
||
for (let i = 0; i < 4; i++) hi += buf[offset + 4 + i] * 2 ** (8 * i);
|
||
return hi * 0x100000000 + lo;
|
||
}
|
||
|
||
function startsWith(buf: Uint8Array, sig: number[]): boolean {
|
||
if (buf.length < sig.length) return false;
|
||
for (let i = 0; i < sig.length; i++) if (buf[i] !== sig[i]) return false;
|
||
return true;
|
||
}
|
||
|
||
export class OggDemuxer {
|
||
// Unconsumed raw bytes carried across push() calls (a page may straddle a network-chunk boundary).
|
||
private pending: Uint8Array = new Uint8Array(0);
|
||
// Bytes of a packet that spans pages (255-length last segment + continuation flag next page).
|
||
private partialPacket: Uint8Array[] = [];
|
||
// Once both setup packets are seen, every subsequent packet is audio.
|
||
private setupPacketsSeen = 0;
|
||
|
||
/**
|
||
* Feed raw stream bytes (any size). Returns all WHOLE Opus AUDIO packets that became decodable,
|
||
* in order. Setup packets (OpusHead/OpusTags) are consumed and skipped. Incomplete trailing bytes
|
||
* are retained for the next push.
|
||
*/
|
||
push(bytes: Uint8Array): OpusPacket[] {
|
||
this.pending = this.concat(this.pending, bytes);
|
||
return this.drainPages();
|
||
}
|
||
|
||
/**
|
||
* Reset to a fresh stream. Used on seek/range-continuation: the new 206 body begins on a page
|
||
* boundary, so all partial-packet and pending state must be dropped. setupPacketsSeen is reset to
|
||
* 2 (already configured) for a continuation — a mid-stream slice carries no setup pages, only audio
|
||
* pages — so the demuxer treats the first page's packets as audio immediately.
|
||
*/
|
||
reset(isContinuation: boolean): void {
|
||
this.pending = new Uint8Array(0);
|
||
this.partialPacket = [];
|
||
this.setupPacketsSeen = isContinuation ? 2 : 0;
|
||
}
|
||
|
||
private drainPages(): OpusPacket[] {
|
||
const packets: OpusPacket[] = [];
|
||
|
||
for (;;) {
|
||
const page = this.tryReadPage();
|
||
if (!page) break;
|
||
this.parsePage(page, packets);
|
||
}
|
||
|
||
return packets;
|
||
}
|
||
|
||
/**
|
||
* Try to slice one complete Ogg page off the front of `pending`. Returns null (and leaves `pending`
|
||
* intact) when a whole page is not yet buffered. Resynchronises by scanning for "OggS" if `pending`
|
||
* does not start on a page boundary (defensive — the encoder writes contiguous pages, but a
|
||
* continuation stream could in theory begin mid-garbage; the seek offset is always a page start).
|
||
*/
|
||
private tryReadPage(): { header: Uint8Array; segTable: Uint8Array; payload: Uint8Array; total: number } | null {
|
||
const buf = this.pending;
|
||
if (buf.length < OGG_PAGE_HEADER_SIZE) return null;
|
||
|
||
// Resync: ensure we are positioned at a capture pattern.
|
||
if (!startsWith(buf, OGG_CAPTURE)) {
|
||
const sync = this.findCapture(buf, 0);
|
||
if (sync < 0) {
|
||
// No capture pattern at all — keep only the last 3 bytes (a capture could straddle).
|
||
this.pending = buf.subarray(Math.max(0, buf.length - 3));
|
||
return null;
|
||
}
|
||
this.pending = buf.subarray(sync);
|
||
return this.tryReadPage();
|
||
}
|
||
|
||
const segCount = buf[SEGMENT_COUNT_OFFSET];
|
||
const segTableEnd = OGG_PAGE_HEADER_SIZE + segCount;
|
||
if (buf.length < segTableEnd) return null; // segment table not fully buffered yet
|
||
|
||
const segTable = buf.subarray(OGG_PAGE_HEADER_SIZE, segTableEnd);
|
||
let payloadSize = 0;
|
||
for (let i = 0; i < segCount; i++) payloadSize += segTable[i];
|
||
|
||
const total = segTableEnd + payloadSize;
|
||
if (buf.length < total) return null; // payload not fully buffered yet
|
||
|
||
const header = buf.subarray(0, OGG_PAGE_HEADER_SIZE);
|
||
const payload = buf.subarray(segTableEnd, total);
|
||
|
||
// Advance past this page.
|
||
this.pending = buf.subarray(total);
|
||
return { header, segTable, payload, total };
|
||
}
|
||
|
||
private parsePage(
|
||
page: { header: Uint8Array; segTable: Uint8Array; payload: Uint8Array; total: number },
|
||
out: OpusPacket[]
|
||
): void {
|
||
const { header, segTable, payload } = page;
|
||
const headerType = header[HEADER_TYPE_OFFSET];
|
||
const continued = (headerType & CONTINUATION_FLAG) !== 0;
|
||
const isEos = (headerType & 0x04) !== 0;
|
||
const granule = readUint64LE(header, GRANULE_OFFSET);
|
||
// 0xFFFFFFFFFFFFFFFF (-1) means "no packet completed on this page" — no usable timestamp.
|
||
// We check the raw bytes rather than comparing `granule === -1` (or the equivalent JS number):
|
||
// the full 64-bit sentinel exceeds 2^53 and cannot be represented exactly as an IEEE-754 double,
|
||
// so the parsed value from readUint64LE would not equal the sentinel. The byte check is exact.
|
||
const hasGranule = !(header[GRANULE_OFFSET] === 0xff && header[GRANULE_OFFSET + 1] === 0xff &&
|
||
header[GRANULE_OFFSET + 2] === 0xff && header[GRANULE_OFFSET + 3] === 0xff &&
|
||
header[GRANULE_OFFSET + 4] === 0xff && header[GRANULE_OFFSET + 5] === 0xff &&
|
||
header[GRANULE_OFFSET + 6] === 0xff && header[GRANULE_OFFSET + 7] === 0xff);
|
||
|
||
// If this page does NOT begin with a continuation, any half-built packet from a prior page is
|
||
// orphaned (should not happen in a well-formed stream, but never carry garbage forward).
|
||
if (!continued) this.partialPacket = [];
|
||
|
||
// Walk the segment table, reassembling packets. A packet ends at the first segment < 255.
|
||
const completedPackets: Uint8Array[] = [];
|
||
let segStart = 0;
|
||
let cursor = 0;
|
||
for (let i = 0; i < segTable.length; i++) {
|
||
const len = segTable[i];
|
||
cursor += len;
|
||
if (len < 255) {
|
||
// Packet boundary: segments [segStart, cursor) form (the tail of) a packet.
|
||
const slice = payload.subarray(segStart, cursor);
|
||
if (this.partialPacket.length > 0) {
|
||
this.partialPacket.push(slice);
|
||
completedPackets.push(this.flattenPartial());
|
||
this.partialPacket = [];
|
||
} else {
|
||
completedPackets.push(slice);
|
||
}
|
||
segStart = cursor;
|
||
}
|
||
// len === 255 with i === last segment -> packet spans into the next page (handled below).
|
||
}
|
||
|
||
// Any trailing 255-run that did not terminate is a packet continuing into the next page.
|
||
if (segStart < cursor) {
|
||
this.partialPacket.push(payload.subarray(segStart, cursor));
|
||
}
|
||
|
||
// Classify completed packets: the first two whole packets in the whole stream are the setup
|
||
// packets (OpusHead, OpusTags) and are skipped. Everything after is audio. The page granule is
|
||
// attached to the LAST completing audio packet on a granule-bearing page (the granule is that
|
||
// page's end-granule per RFC 7845).
|
||
for (let p = 0; p < completedPackets.length; p++) {
|
||
const pkt = completedPackets[p];
|
||
if (this.setupPacketsSeen < 2) {
|
||
// Only count packets that are actually the Opus setup headers; guard against a stray
|
||
// first audio packet being mistaken for setup on a continuation (reset handles that).
|
||
if (this.setupPacketsSeen === 0 && startsWith(pkt, OPUS_HEAD_SIG)) {
|
||
this.setupPacketsSeen = 1;
|
||
continue;
|
||
}
|
||
if (this.setupPacketsSeen === 1 && startsWith(pkt, OPUS_TAGS_SIG)) {
|
||
this.setupPacketsSeen = 2;
|
||
continue;
|
||
}
|
||
// Not a recognised setup packet while we still expected one — treat as audio (a
|
||
// continuation slice that began mid-stream). Fall through.
|
||
}
|
||
|
||
const isLastCompleting = p === completedPackets.length - 1;
|
||
out.push({
|
||
data: pkt,
|
||
pageGranule: hasGranule && isLastCompleting ? granule : null,
|
||
isLastPage: isEos
|
||
});
|
||
}
|
||
}
|
||
|
||
private flattenPartial(): Uint8Array {
|
||
if (this.partialPacket.length === 1) return this.partialPacket[0];
|
||
let len = 0;
|
||
for (const s of this.partialPacket) len += s.length;
|
||
const out = new Uint8Array(len);
|
||
let o = 0;
|
||
for (const s of this.partialPacket) {
|
||
out.set(s, o);
|
||
o += s.length;
|
||
}
|
||
return out;
|
||
}
|
||
|
||
private findCapture(buf: Uint8Array, from: number): number {
|
||
for (let i = from; i + 4 <= buf.length; i++) {
|
||
if (buf[i] === OGG_CAPTURE[0] && buf[i + 1] === OGG_CAPTURE[1] &&
|
||
buf[i + 2] === OGG_CAPTURE[2] && buf[i + 3] === OGG_CAPTURE[3]) {
|
||
return i;
|
||
}
|
||
}
|
||
return -1;
|
||
}
|
||
|
||
private concat(a: Uint8Array, b: Uint8Array): Uint8Array {
|
||
if (a.length === 0) return b;
|
||
if (b.length === 0) return a;
|
||
const out = new Uint8Array(a.length + b.length);
|
||
out.set(a, 0);
|
||
out.set(b, a.length);
|
||
return out;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Extract the raw OpusHead identification-header *packet* from the sidecar's setup-header bytes (which
|
||
* are the verbatim Ogg PAGES wrapping OpusHead + OpusTags). WebCodecs' `AudioDecoderConfig.description`
|
||
* for Opus is the OpusHead packet (RFC 7845 §5.1), not the Ogg page — so we demux the setup pages and
|
||
* return the first packet's bytes. Returns null if no OpusHead packet is found.
|
||
*/
|
||
export function extractOpusHead(setupHeaderBytes: Uint8Array): Uint8Array | null {
|
||
// Walk pages manually (the setup region is small — at most two pages) and return the first packet
|
||
// that starts with the OpusHead signature.
|
||
let offset = 0;
|
||
while (offset + OGG_PAGE_HEADER_SIZE <= setupHeaderBytes.length) {
|
||
if (!(setupHeaderBytes[offset] === OGG_CAPTURE[0] && setupHeaderBytes[offset + 1] === OGG_CAPTURE[1] &&
|
||
setupHeaderBytes[offset + 2] === OGG_CAPTURE[2] && setupHeaderBytes[offset + 3] === OGG_CAPTURE[3])) {
|
||
return null;
|
||
}
|
||
const segCount = setupHeaderBytes[offset + SEGMENT_COUNT_OFFSET];
|
||
const segTableEnd = offset + OGG_PAGE_HEADER_SIZE + segCount;
|
||
if (segTableEnd > setupHeaderBytes.length) return null;
|
||
let payloadSize = 0;
|
||
for (let i = 0; i < segCount; i++) payloadSize += setupHeaderBytes[segTableEnd - segCount + i];
|
||
const payloadStart = segTableEnd;
|
||
const payloadEnd = payloadStart + payloadSize;
|
||
if (payloadEnd > setupHeaderBytes.length) return null;
|
||
|
||
const payload = setupHeaderBytes.subarray(payloadStart, payloadEnd);
|
||
if (startsWith(payload, OPUS_HEAD_SIG)) {
|
||
// The OpusHead packet is the whole first-page payload (it always fits one segment / page).
|
||
return payload;
|
||
}
|
||
offset = payloadEnd;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/** Channel count from an OpusHead packet (RFC 7845 §5.1: byte 9, after the 8-byte magic + version). */
|
||
export function opusHeadChannelCount(opusHead: Uint8Array): number {
|
||
if (opusHead.length < 10) return 2; // safe nominal
|
||
return opusHead[9];
|
||
}
|