170 lines
8.4 KiB
TypeScript
170 lines
8.4 KiB
TypeScript
/**
|
|
* OpusFormatDecoder - Ogg-Opus implementation of IFormatDecoder.
|
|
*
|
|
* Ogg Opus is a containerized, paged format — NOT raw-frame-sliceable the way WAV PCM is. Two
|
|
* things make a mid-stream byte slice decodable: (1) it must begin on an Ogg page boundary, and
|
|
* (2) the OpusHead/OpusTags setup pages must be prepended (analogous to FLAC's STREAMINFO carry).
|
|
* This decoder owns both, plus VBR-safe accurate seeking.
|
|
*
|
|
* Where the metadata comes from is the genuinely new part. WAV/MP3/FLAC parse everything out of
|
|
* the byte stream. Opus is VBR and container-paged, so a byteRate seek would be inaccurate; instead
|
|
* the seek transfer function (granule->byte) and the setup bytes are precomputed at transcode time
|
|
* (wave 18.1) and delivered as a one-time sidecar fetch (wave 18.5). The injection seam is
|
|
* `setSidecar(OpusSeekData)` — call it with the parsed sidecar BEFORE the stream is initialized so
|
|
* `tryParseHeader` can build FormatInfo from it. Without a sidecar the decoder cannot stream Opus
|
|
* (returns null from tryParseHeader); 18.5 guarantees the fetch precedes stream init.
|
|
*
|
|
* - getAlignedSegmentSize aligns to Ogg page boundaries by scanning for the "OggS" capture
|
|
* pattern (the Ogg analogue of FLAC's frame-sync scan; the interface passes rawData for this).
|
|
* - wrapSegment prepends the cached OpusHead/OpusTags setup bytes so any mid-stream page run is
|
|
* independently decodable.
|
|
* - calculateByteOffset binary-searches the precomputed index for the largest entry with
|
|
* presentation-time <= t and returns its exact page-start byte offset — NOT interpolation,
|
|
* NOT byteRate math (§3.4a A/C; C5 accurate seek).
|
|
*/
|
|
|
|
import { FormatInfo, IFormatDecoder } from './IFormatDecoder.js';
|
|
import { OpusSeekData, OPUS_SAMPLE_RATE, presentationTimeSeconds } from './OpusSidecar.js';
|
|
|
|
// "OggS" — every Ogg page begins with this 4-byte capture pattern.
|
|
const OGG_CAPTURE = [0x4f, 0x67, 0x67, 0x53]; // 'O' 'g' 'g' 'S'
|
|
|
|
export class OpusFormatDecoder implements IFormatDecoder {
|
|
// The parsed sidecar: setup bytes + seek index + preSkip + totals. Injected by wave 18.5 via
|
|
// setSidecar before stream init. Held for the stream's lifetime (the format does not change
|
|
// across a seek/continuation), mirroring how FlacFormatDecoder retains streamInfoBytes.
|
|
private sidecar: OpusSeekData | null = null;
|
|
|
|
/**
|
|
* Inject the parsed sidecar (setup header + seek index) for this stream. Wave 18.5 calls this
|
|
* after its one-time sidecar fetch + parseSidecar, before initializeStreaming. This is the seam
|
|
* that keeps the HTTP fetch out of the decoder: the decoder is pure and unit-testable against
|
|
* synthetic bytes, and 18.5 wires the real transport.
|
|
*/
|
|
setSidecar(sidecar: OpusSeekData): void {
|
|
this.sidecar = sidecar;
|
|
}
|
|
|
|
tryParseHeader(_chunks: Uint8Array[], _totalSize: number): FormatInfo | null {
|
|
// Opus metadata is NOT parsed from the stream — it comes from the injected sidecar. Without
|
|
// it we cannot stream Opus; return null so StreamDecoder waits, and 18.5's contract (fetch +
|
|
// setSidecar before stream init) prevents that null from persisting.
|
|
const sidecar = this.sidecar;
|
|
if (!sidecar) return null;
|
|
|
|
// For the initial full-file stream the server emits [setup pages][audio pages], and the
|
|
// sidecar's setup bytes are exactly those leading pages — so audio data begins right after
|
|
// them. This is the file-absolute offset of the first audio page (== the first index point's
|
|
// byteOffset by construction).
|
|
const audioDataOffset = sidecar.setupHeaderBytes.length;
|
|
|
|
return {
|
|
// Opus always decodes at 48 kHz regardless of the source rate (RFC 7845).
|
|
sampleRate: OPUS_SAMPLE_RATE,
|
|
// Channel count is encoded in OpusHead; the decoder reads it from the prepended setup
|
|
// bytes at decode time. FormatInfo.channels is display-only here — 2 is the safe nominal.
|
|
channels: 2,
|
|
bitsPerSample: 16,
|
|
byteRate: 0, // VBR + paged; seeking uses the index, never byteRate.
|
|
blockAlign: 0, // No fixed alignment; segments align to Ogg page starts via OggS scan.
|
|
totalDuration: sidecar.totalDurationSeconds > 0 ? sidecar.totalDurationSeconds : null,
|
|
audioDataOffset,
|
|
seekData: sidecar
|
|
};
|
|
}
|
|
|
|
getAlignedSegmentSize(
|
|
info: FormatInfo,
|
|
availableBytes: number,
|
|
requestedSize: number,
|
|
streamComplete: boolean,
|
|
rawData?: Uint8Array
|
|
): number {
|
|
if (availableBytes === 0) return 0;
|
|
const candidate = Math.min(requestedSize, availableBytes);
|
|
|
|
if (!rawData || rawData.length === 0) {
|
|
// No scan data — conservative threshold to avoid tiny unusable segments (mirrors FLAC).
|
|
if (!streamComplete && availableBytes < 16384) return 0;
|
|
return candidate;
|
|
}
|
|
|
|
// Scan backward from the candidate boundary for the start of the last Ogg page. Cutting on a
|
|
// page start keeps the next segment Ogg-sync-aligned and the current one a whole page run.
|
|
const boundary = OpusFormatDecoder.findLastOggPage(rawData, candidate);
|
|
if (boundary <= 0) {
|
|
if (streamComplete) return candidate; // flush remaining bytes (stream done)
|
|
return 0; // wait for more data — no full page boundary yet
|
|
}
|
|
return boundary;
|
|
}
|
|
|
|
/**
|
|
* Scan backward from `maxBytes` in `rawData` for the start of the last "OggS" capture pattern.
|
|
* Returns that byte offset (the page start), or 0 if none is found (caller waits for more data).
|
|
* Skips offset 0 itself: a segment that is only "everything up to the very first page" carries
|
|
* no page and should wait, matching the FLAC frame-scan's `> 0` discipline.
|
|
*/
|
|
private static findLastOggPage(rawData: Uint8Array, maxBytes: number): number {
|
|
const limit = Math.min(maxBytes, rawData.length);
|
|
for (let i = limit - 4; i > 0; i--) {
|
|
if (rawData[i] === OGG_CAPTURE[0] &&
|
|
rawData[i + 1] === OGG_CAPTURE[1] &&
|
|
rawData[i + 2] === OGG_CAPTURE[2] &&
|
|
rawData[i + 3] === OGG_CAPTURE[3]) {
|
|
return i;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
wrapSegment(info: FormatInfo, rawBytes: Uint8Array): Uint8Array {
|
|
const sidecar = OpusFormatDecoder.opusSeekData(info);
|
|
const setupBytes = sidecar?.setupHeaderBytes;
|
|
if (!setupBytes || setupBytes.length === 0) {
|
|
// Defensive: without setup bytes a mid-stream page run is undecodable. tryParseHeader
|
|
// always populates the sidecar on success, so this path should not occur in practice.
|
|
return rawBytes;
|
|
}
|
|
|
|
// Prepend OpusHead/OpusTags so the page run is self-contained for decodeAudioData.
|
|
const result = new Uint8Array(setupBytes.length + rawBytes.length);
|
|
result.set(setupBytes, 0);
|
|
result.set(rawBytes, setupBytes.length);
|
|
return result;
|
|
}
|
|
|
|
calculateByteOffset(info: FormatInfo, positionSeconds: number): number {
|
|
const sidecar = OpusFormatDecoder.opusSeekData(info);
|
|
if (!sidecar || sidecar.points.length === 0) {
|
|
// No index: degrade to start of audio (seek restarts) — same graceful fallback as FLAC.
|
|
return info.audioDataOffset;
|
|
}
|
|
|
|
const points = sidecar.points;
|
|
const preSkip = sidecar.preSkip;
|
|
|
|
// Binary search for the largest entry whose presentation time is <= target. Presentation
|
|
// time = max(0, (granule - preSkip) / 48000), matching 18.1's RFC 7845 math exactly.
|
|
let lo = 0, hi = points.length - 1, best = 0;
|
|
while (lo <= hi) {
|
|
const mid = (lo + hi) >> 1;
|
|
const t = presentationTimeSeconds(points[mid].granulePosition, preSkip);
|
|
if (t <= positionSeconds) {
|
|
best = mid;
|
|
lo = mid + 1;
|
|
} else {
|
|
hi = mid - 1;
|
|
}
|
|
}
|
|
|
|
// byteOffset is already a file-absolute page-start offset in the Opus file — no header math
|
|
// to add (unlike FLAC's audio-relative stream_offset). Return it directly.
|
|
return points[best].byteOffset;
|
|
}
|
|
|
|
private static opusSeekData(info: FormatInfo): OpusSeekData | null {
|
|
return info.seekData?.kind === 'opus-sidecar' ? info.seekData : null;
|
|
}
|
|
}
|