Files
deepdrft/DeepDrftPublic/Interop/audio/OpusFormatDecoder.ts
T

170 lines
8.4 KiB
TypeScript

/**
* OpusFormatDecoder - Ogg-Opus implementation of IFormatDecoder.
*
* Ogg Opus is a containerized, paged format — NOT raw-frame-sliceable the way WAV PCM is. Two
* things make a mid-stream byte slice decodable: (1) it must begin on an Ogg page boundary, and
* (2) the OpusHead/OpusTags setup pages must be prepended (analogous to FLAC's STREAMINFO carry).
* This decoder owns both, plus VBR-safe accurate seeking.
*
* Where the metadata comes from is the genuinely new part. WAV/MP3/FLAC parse everything out of
* the byte stream. Opus is VBR and container-paged, so a byteRate seek would be inaccurate; instead
* the seek transfer function (granule->byte) and the setup bytes are precomputed at transcode time
* (wave 18.1) and delivered as a one-time sidecar fetch (wave 18.5). The injection seam is
* `setSidecar(OpusSeekData)` — call it with the parsed sidecar BEFORE the stream is initialized so
* `tryParseHeader` can build FormatInfo from it. Without a sidecar the decoder cannot stream Opus
* (returns null from tryParseHeader); 18.5 guarantees the fetch precedes stream init.
*
* - getAlignedSegmentSize aligns to Ogg page boundaries by scanning for the "OggS" capture
* pattern (the Ogg analogue of FLAC's frame-sync scan; the interface passes rawData for this).
* - wrapSegment prepends the cached OpusHead/OpusTags setup bytes so any mid-stream page run is
* independently decodable.
* - calculateByteOffset binary-searches the precomputed index for the largest entry with
* presentation-time <= t and returns its exact page-start byte offset — NOT interpolation,
* NOT byteRate math (§3.4a A/C; C5 accurate seek).
*/
import { FormatInfo, IFormatDecoder } from './IFormatDecoder.js';
import { OpusSeekData, OPUS_SAMPLE_RATE, presentationTimeSeconds } from './OpusSidecar.js';
// "OggS" — every Ogg page begins with this 4-byte capture pattern.
const OGG_CAPTURE = [0x4f, 0x67, 0x67, 0x53]; // 'O' 'g' 'g' 'S'
export class OpusFormatDecoder implements IFormatDecoder {
// The parsed sidecar: setup bytes + seek index + preSkip + totals. Injected by wave 18.5 via
// setSidecar before stream init. Held for the stream's lifetime (the format does not change
// across a seek/continuation), mirroring how FlacFormatDecoder retains streamInfoBytes.
private sidecar: OpusSeekData | null = null;
/**
* Inject the parsed sidecar (setup header + seek index) for this stream. Wave 18.5 calls this
* after its one-time sidecar fetch + parseSidecar, before initializeStreaming. This is the seam
* that keeps the HTTP fetch out of the decoder: the decoder is pure and unit-testable against
* synthetic bytes, and 18.5 wires the real transport.
*/
setSidecar(sidecar: OpusSeekData): void {
this.sidecar = sidecar;
}
tryParseHeader(_chunks: Uint8Array[], _totalSize: number): FormatInfo | null {
// Opus metadata is NOT parsed from the stream — it comes from the injected sidecar. Without
// it we cannot stream Opus; return null so StreamDecoder waits, and 18.5's contract (fetch +
// setSidecar before stream init) prevents that null from persisting.
const sidecar = this.sidecar;
if (!sidecar) return null;
// For the initial full-file stream the server emits [setup pages][audio pages], and the
// sidecar's setup bytes are exactly those leading pages — so audio data begins right after
// them. This is the file-absolute offset of the first audio page (== the first index point's
// byteOffset by construction).
const audioDataOffset = sidecar.setupHeaderBytes.length;
return {
// Opus always decodes at 48 kHz regardless of the source rate (RFC 7845).
sampleRate: OPUS_SAMPLE_RATE,
// Channel count is encoded in OpusHead; the decoder reads it from the prepended setup
// bytes at decode time. FormatInfo.channels is display-only here — 2 is the safe nominal.
channels: 2,
bitsPerSample: 16,
byteRate: 0, // VBR + paged; seeking uses the index, never byteRate.
blockAlign: 0, // No fixed alignment; segments align to Ogg page starts via OggS scan.
totalDuration: sidecar.totalDurationSeconds > 0 ? sidecar.totalDurationSeconds : null,
audioDataOffset,
seekData: sidecar
};
}
getAlignedSegmentSize(
info: FormatInfo,
availableBytes: number,
requestedSize: number,
streamComplete: boolean,
rawData?: Uint8Array
): number {
if (availableBytes === 0) return 0;
const candidate = Math.min(requestedSize, availableBytes);
if (!rawData || rawData.length === 0) {
// No scan data — conservative threshold to avoid tiny unusable segments (mirrors FLAC).
if (!streamComplete && availableBytes < 16384) return 0;
return candidate;
}
// Scan backward from the candidate boundary for the start of the last Ogg page. Cutting on a
// page start keeps the next segment Ogg-sync-aligned and the current one a whole page run.
const boundary = OpusFormatDecoder.findLastOggPage(rawData, candidate);
if (boundary <= 0) {
if (streamComplete) return candidate; // flush remaining bytes (stream done)
return 0; // wait for more data — no full page boundary yet
}
return boundary;
}
/**
* Scan backward from `maxBytes` in `rawData` for the start of the last "OggS" capture pattern.
* Returns that byte offset (the page start), or 0 if none is found (caller waits for more data).
* Skips offset 0 itself: a segment that is only "everything up to the very first page" carries
* no page and should wait, matching the FLAC frame-scan's `> 0` discipline.
*/
private static findLastOggPage(rawData: Uint8Array, maxBytes: number): number {
const limit = Math.min(maxBytes, rawData.length);
for (let i = limit - 4; i > 0; i--) {
if (rawData[i] === OGG_CAPTURE[0] &&
rawData[i + 1] === OGG_CAPTURE[1] &&
rawData[i + 2] === OGG_CAPTURE[2] &&
rawData[i + 3] === OGG_CAPTURE[3]) {
return i;
}
}
return 0;
}
wrapSegment(info: FormatInfo, rawBytes: Uint8Array): Uint8Array {
const sidecar = OpusFormatDecoder.opusSeekData(info);
const setupBytes = sidecar?.setupHeaderBytes;
if (!setupBytes || setupBytes.length === 0) {
// Defensive: without setup bytes a mid-stream page run is undecodable. tryParseHeader
// always populates the sidecar on success, so this path should not occur in practice.
return rawBytes;
}
// Prepend OpusHead/OpusTags so the page run is self-contained for decodeAudioData.
const result = new Uint8Array(setupBytes.length + rawBytes.length);
result.set(setupBytes, 0);
result.set(rawBytes, setupBytes.length);
return result;
}
calculateByteOffset(info: FormatInfo, positionSeconds: number): number {
const sidecar = OpusFormatDecoder.opusSeekData(info);
if (!sidecar || sidecar.points.length === 0) {
// No index: degrade to start of audio (seek restarts) — same graceful fallback as FLAC.
return info.audioDataOffset;
}
const points = sidecar.points;
const preSkip = sidecar.preSkip;
// Binary search for the largest entry whose presentation time is <= target. Presentation
// time = max(0, (granule - preSkip) / 48000), matching 18.1's RFC 7845 math exactly.
let lo = 0, hi = points.length - 1, best = 0;
while (lo <= hi) {
const mid = (lo + hi) >> 1;
const t = presentationTimeSeconds(points[mid].granulePosition, preSkip);
if (t <= positionSeconds) {
best = mid;
lo = mid + 1;
} else {
hi = mid - 1;
}
}
// byteOffset is already a file-absolute page-start offset in the Opus file — no header math
// to add (unlike FLAC's audio-relative stream_offset). Return it directly.
return points[best].byteOffset;
}
private static opusSeekData(info: FormatInfo): OpusSeekData | null {
return info.seekData?.kind === 'opus-sidecar' ? info.seekData : null;
}
}