From 7f3fb74126d0480d4a5ac20441ae1e76a48d9116 Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Tue, 23 Jun 2026 17:42:06 -0400 Subject: [PATCH 1/2] Replace broken per-segment Opus decode with WebCodecs AudioDecoder streaming pipeline --- .../Services/AudioInteropService.cs | 9 +- DeepDrftPublic/Interop/audio/AudioPlayer.ts | 145 ++++++- .../Interop/audio/IFormatDecoder.ts | 8 +- .../Interop/audio/IStreamingDecoder.ts | 58 +++ DeepDrftPublic/Interop/audio/OggDemuxer.ts | 295 ++++++++++++++ .../Interop/audio/OpusCapability.ts | 108 ++--- .../Interop/audio/OpusFormatDecoder.test.ts | 364 ----------------- .../Interop/audio/OpusFormatDecoder.ts | 169 -------- DeepDrftPublic/Interop/audio/OpusSidecar.ts | 33 ++ .../Interop/audio/OpusStreamDecoder.test.ts | 380 ++++++++++++++++++ .../Interop/audio/OpusStreamDecoder.ts | 262 ++++++++++++ DeepDrftPublic/Interop/audio/index.ts | 6 +- DeepDrftPublic/Interop/audio/webcodecs.d.ts | 82 ++++ 13 files changed, 1270 insertions(+), 649 deletions(-) create mode 100644 DeepDrftPublic/Interop/audio/IStreamingDecoder.ts create mode 100644 DeepDrftPublic/Interop/audio/OggDemuxer.ts delete mode 100644 DeepDrftPublic/Interop/audio/OpusFormatDecoder.test.ts delete mode 100644 DeepDrftPublic/Interop/audio/OpusFormatDecoder.ts create mode 100644 DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts create mode 100644 DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts create mode 100644 DeepDrftPublic/Interop/audio/webcodecs.d.ts diff --git a/DeepDrftPublic.Client/Services/AudioInteropService.cs b/DeepDrftPublic.Client/Services/AudioInteropService.cs index 6ea87e5..966fd99 100644 --- a/DeepDrftPublic.Client/Services/AudioInteropService.cs +++ b/DeepDrftPublic.Client/Services/AudioInteropService.cs @@ -71,10 +71,11 @@ public class AudioInteropService : IAsyncDisposable } /// - /// Probes whether this browser can decode Ogg Opus via decodeAudioData (Safari < 18.4 cannot). - /// Phase 18 capability gate (OQ2): the player only requests Opus when this returns true, otherwise it - /// stays on the universal lossless path (AC7 — no listener ever gets silence over a codec gap). Probe - /// failures degrade to false (assume incapable) so an interop error can never silence playback. + /// Probes whether this browser can stream-decode Ogg Opus via WebCodecs (AudioDecoder + + /// codec:'opus'; Safari < 16.4 / older Firefox cannot). Phase 18 capability gate (OQ2): the + /// player only requests Opus when this returns true, otherwise it stays on the universal lossless path + /// (AC7 — no listener ever gets silence over a codec gap). Probe failures degrade to false + /// (assume incapable) so an interop error can never silence playback. /// public async Task CanDecodeOggOpus() { diff --git a/DeepDrftPublic/Interop/audio/AudioPlayer.ts b/DeepDrftPublic/Interop/audio/AudioPlayer.ts index be36e90..864291c 100644 --- a/DeepDrftPublic/Interop/audio/AudioPlayer.ts +++ b/DeepDrftPublic/Interop/audio/AudioPlayer.ts @@ -11,11 +11,12 @@ import { AudioContextManager } from './AudioContextManager.js'; import { StreamDecoder } from './StreamDecoder.js'; import { PlaybackScheduler } from './PlaybackScheduler.js'; import { IFormatDecoder } from './IFormatDecoder.js'; +import { IStreamingDecoder } from './IStreamingDecoder.js'; import { WavFormatDecoder } from './WavFormatDecoder.js'; import { Mp3FormatDecoder } from './Mp3FormatDecoder.js'; import { FlacFormatDecoder } from './FlacFormatDecoder.js'; -import { OpusFormatDecoder } from './OpusFormatDecoder.js'; -import { OpusSeekData, parseSidecar } from './OpusSidecar.js'; +import { OpusStreamDecoder } from './OpusStreamDecoder.js'; +import { OpusSeekData, parseSidecar, resolveOpusByteOffset } from './OpusSidecar.js'; export interface AudioResult { success: boolean; @@ -47,6 +48,15 @@ export class AudioPlayer { private streamDecoder: StreamDecoder; private scheduler: PlaybackScheduler; + // The Opus WebCodecs decode path (IStreamingDecoder seam), used INSTEAD of streamDecoder when the + // active stream is Ogg Opus. Null for WAV/MP3/FLAC, which keep the streamDecoder path unchanged. + // Holding both is deliberate: the change is the decode stage only; the same scheduler/Web Audio + // graph feeds from whichever decoder is active for the current stream. + private opusDecoder: IStreamingDecoder | null = null; + // The sidecar in effect for the active Opus stream (its seek index resolves byte offsets). Distinct + // from pendingOpusSidecar, which is the one set for the NEXT stream init. + private activeOpusSidecar: OpusSeekData | null = null; + // Playback state private isPlaying: boolean = false; private isPaused: boolean = false; @@ -106,10 +116,24 @@ export class AudioPlayer { this.stopProgressTracking(); this.scheduler.clear(); this.streamDecoder.reset(); + this.disposeOpusDecoder(); this.resetState(); - // Initialize new stream with the format decoder selected from Content-Type. this.isStreamingMode = true; + + // Opus routes to the WebCodecs streaming seam (IStreamingDecoder); WAV/MP3/FLAC keep the + // StreamDecoder wrap-and-decode path byte-for-byte. The sidecar (setup header + seek index) + // must already be set (setOpusSidecar, before init) — without it Opus cannot be decoded or + // seeked, so we fall back by leaving opusDecoder null and using the StreamDecoder path, + // which the server's C2 fallback (lossless bytes) matches. In practice the C# resolver only + // selects Opus when the sidecar parsed, so the null branch is defensive. + if (this.isOpusContentType(contentType) && this.pendingOpusSidecar) { + this.activeOpusSidecar = this.pendingOpusSidecar; + this.opusDecoder = new OpusStreamDecoder(this.contextManager, this.pendingOpusSidecar); + return { success: true }; + } + + // Non-Opus (or Opus-without-sidecar): the existing StreamDecoder path, unchanged. const formatDecoder = this.createFormatDecoder(contentType); this.streamDecoder.initialize(totalStreamLength, formatDecoder); return { success: true }; @@ -118,6 +142,18 @@ export class AudioPlayer { } } + private isOpusContentType(contentType: string): boolean { + return contentType.includes('audio/ogg') || contentType.includes('audio/opus'); + } + + private disposeOpusDecoder(): void { + if (this.opusDecoder) { + this.opusDecoder.dispose(); + this.opusDecoder = null; + } + this.activeOpusSidecar = null; + } + /** * Inject the Opus sidecar (setup header + seek index) for the next Opus stream. Wave 18.5 calls * this with the raw sidecar bytes (from its one-time HTTP fetch) BEFORE initializeStreaming; the @@ -136,8 +172,9 @@ export class AudioPlayer { } /** - * Select a format decoder from the response Content-Type. For Opus, applies the pending sidecar - * (if 18.5 has set one) so the decoder has its setup bytes + seek index before stream init. + * Select a format decoder from the response Content-Type for the StreamDecoder (wrap-and-decode) + * path. Opus is NOT handled here — it routes to the WebCodecs IStreamingDecoder seam in + * initializeStreaming. This factory serves WAV/MP3/FLAC only. */ private createFormatDecoder(contentType: string): IFormatDecoder { if (contentType.includes('audio/mpeg') || contentType.includes('audio/mp3')) { @@ -146,13 +183,6 @@ export class AudioPlayer { if (contentType.includes('audio/flac') || contentType.includes('audio/x-flac')) { return new FlacFormatDecoder(); } - if (contentType.includes('audio/ogg') || contentType.includes('audio/opus')) { - const decoder = new OpusFormatDecoder(); - if (this.pendingOpusSidecar) { - decoder.setSidecar(this.pendingOpusSidecar); - } - return decoder; - } return new WavFormatDecoder(); // default (audio/wav, unknown) } @@ -165,10 +195,12 @@ export class AudioPlayer { */ async markStreamComplete(): Promise { try { - const results = await this.streamDecoder.markStreamComplete(); + const results = this.opusDecoder + ? await this.opusDecoder.complete() + : (await this.streamDecoder.markStreamComplete()).map(r => r.buffer); if (results.length > 0) { - for (const result of results) { - this.scheduler.addBuffer(result.buffer); + for (const buffer of results) { + this.scheduler.addBuffer(buffer); } if (this.streamingStarted && this.isPlaying) { this.scheduler.scheduleNewBuffers(); @@ -182,6 +214,53 @@ export class AudioPlayer { } async processStreamingChunk(chunk: Uint8Array): Promise { + return this.opusDecoder + ? this.processOpusChunk(chunk) + : this.processFormatChunk(chunk); + } + + /** Opus (WebCodecs) chunk path. Mirrors processFormatChunk's add->schedule->report shape. */ + private async processOpusChunk(chunk: Uint8Array): Promise { + try { + const decoder = this.opusDecoder!; + const buffers = await decoder.push(chunk); + + if (buffers.length > 0) { + for (const buffer of buffers) { + this.scheduler.addBuffer(buffer); + } + // Duration is known up front from the sidecar; set once (a seek must not overwrite it). + if (this.duration === 0 && decoder.totalDuration) { + this.duration = decoder.totalDuration; + } + if (this.streamingStarted && this.isPlaying) { + this.scheduler.scheduleNewBuffers(); + } + } + + if (decoder.hasFatalError) { + return { success: false, error: 'Opus decode failed' }; + } + + // "headerParsed" maps to the decoder being configured (codec ready). canStart needs the + // min buffer count, exactly as the WAV path requires before first playback. + const headerParsed = decoder.ready; + const canStart = headerParsed && this.scheduler.hasMinimumBuffers(this.minBuffersForPlayback); + + return { + success: true, + canStartStreaming: canStart, + headerParsed, + bufferCount: this.scheduler.getBufferCount(), + duration: this.duration + }; + } catch (error) { + return { success: false, error: (error as Error).message }; + } + } + + /** WAV/MP3/FLAC (StreamDecoder) chunk path — unchanged from before the Opus seam split. */ + private async processFormatChunk(chunk: Uint8Array): Promise { try { const results = await this.streamDecoder.processChunk(chunk); @@ -310,6 +389,7 @@ export class AudioPlayer { try { this.scheduler.clear(); this.streamDecoder.reset(); + this.disposeOpusDecoder(); this.resetState(); this.stopProgressTracking(); @@ -371,8 +451,21 @@ export class AudioPlayer { */ private seekBeyondBuffer(position: number): AudioResult { try { - // The header must be parsed for byte-offset math; without it we cannot - // build a valid Range request. + // Opus: resolve the offset from the precomputed seek index (the accurate VBR-safe transfer + // function). The returned offset is a real page start, so the Range continuation lands the + // demuxer/decoder Ogg-sync-aligned. + if (this.opusDecoder) { + if (!this.activeOpusSidecar) { + return { success: false, error: 'Cannot calculate byte offset' }; + } + return { + success: true, + seekBeyondBuffer: true, + byteOffset: resolveOpusByteOffset(this.activeOpusSidecar, position) + }; + } + + // WAV/MP3/FLAC: the header must be parsed for byte-offset math. if (!this.streamDecoder.getFormatInfo()) { return { success: false, error: 'Cannot calculate byte offset' }; } @@ -404,6 +497,11 @@ export class AudioPlayer { * Calculate byte offset for a time position (for C# layer) */ calculateByteOffset(positionSeconds: number): number { + if (this.opusDecoder) { + return this.activeOpusSidecar + ? resolveOpusByteOffset(this.activeOpusSidecar, positionSeconds) + : 0; + } if (!this.streamDecoder.getFormatInfo()) return 0; return this.streamDecoder.calculateByteOffset(positionSeconds); } @@ -416,17 +514,20 @@ export class AudioPlayer { try { // Stop current playback this.stopProgressTracking(); - const wasPlaying = this.isPlaying; this.isPlaying = false; // Clear buffers and set new offset this.scheduler.clearForSeek(); this.scheduler.setPlaybackOffset(seekPosition); - // Reinitialize decoder for the Range-continuation stream. totalStreamLength - // here is the 206 Content-Length (range start → EOF), not the full file size — - // the decoder uses it to detect stream-complete against raw audio bytes. - this.streamDecoder.reinitializeForRangeContinuation(totalStreamLength); + // Reinitialize the active decoder for the Range-continuation stream (206 body, no header/ + // setup pages). Opus resets demux + codec state (keeping the cached config); the + // StreamDecoder path uses totalStreamLength (the 206 Content-Length) to detect completion. + if (this.opusDecoder) { + this.opusDecoder.reinitializeForRangeContinuation(); + } else { + this.streamDecoder.reinitializeForRangeContinuation(totalStreamLength); + } // Update state this.pausePosition = seekPosition; diff --git a/DeepDrftPublic/Interop/audio/IFormatDecoder.ts b/DeepDrftPublic/Interop/audio/IFormatDecoder.ts index 0964327..51d3cc4 100644 --- a/DeepDrftPublic/Interop/audio/IFormatDecoder.ts +++ b/DeepDrftPublic/Interop/audio/IFormatDecoder.ts @@ -1,5 +1,3 @@ -import { OpusSeekData } from './OpusSidecar.js'; - /** * FormatInfo: parsed header data needed to stream and seek an audio file. * Populated by IFormatDecoder.tryParseHeader; used by StreamDecoder throughout playback. @@ -38,10 +36,10 @@ export interface FormatInfo { * MP3 VBR: Xing/VBRI TOC (100-entry Uint8Array, values are file-percentage * 255). * FLAC: SeekTable (array of {sampleNumber: number, streamOffset: number} — stream_offset * is bytes from the start of audio frames, i.e. after all metadata blocks). - * Opus: OpusSeekData — the precomputed granule->byte index + OpusHead/OpusTags setup bytes, - * parsed from the sidecar artifact (NOT byteRate math; see OpusFormatDecoder). + * Opus does NOT flow through this seam — it uses the WebCodecs IStreamingDecoder path and resolves + * seek offsets via OpusSidecar.resolveOpusByteOffset, not FormatInfo.seekData. */ - seekData?: Mp3VbrSeekData | FlacSeekData | OpusSeekData | null; + seekData?: Mp3VbrSeekData | FlacSeekData | null; } export interface Mp3VbrSeekData { diff --git a/DeepDrftPublic/Interop/audio/IStreamingDecoder.ts b/DeepDrftPublic/Interop/audio/IStreamingDecoder.ts new file mode 100644 index 0000000..2fda588 --- /dev/null +++ b/DeepDrftPublic/Interop/audio/IStreamingDecoder.ts @@ -0,0 +1,58 @@ +/** + * IStreamingDecoder - the stateful streaming-decode seam, parallel to IFormatDecoder. + * + * Why a second seam. `IFormatDecoder` (WAV/MP3/FLAC) is a *wrap-and-decode-each-segment* strategy: + * `StreamDecoder` cuts the stream into independently-decodable segments, `wrapSegment` makes each a + * standalone file, and `decodeAudioData` decodes each in isolation. That model is correct for raw PCM + * (WAV) and independently-decodable frames (FLAC), but it is fundamentally wrong for Opus: Opus has + * pre-skip (encoder delay) and inter-frame state (MDCT overlap-add, SILK/CELT continuity), so decoding + * page-runs independently re-applies the pre-skip and starts from cold codec state at every boundary — + * audible glitching and a broken timeline. + * + * A WebCodecs `AudioDecoder` is the right tool: one stateful decoder fed packets sequentially, decoding + * continuously with correct pre-skip-once handling and full inter-frame continuity. But it does NOT fit + * `IFormatDecoder` — it is async/callback-driven and owns its own buffering. So Opus gets this seam + * instead. `AudioPlayer` dispatches by content-type: WAV/MP3/FLAC keep the `StreamDecoder` path + * byte-for-byte; Opus routes here. Both feed the SAME `PlaybackScheduler` — the change is the decode + * stage only, never the schedule/playback stage. + * + * The seam is intentionally minimal and mirrors the lifecycle `StreamDecoder` already exposes so + * `AudioPlayer` can treat the two uniformly: initialize -> push chunks -> mark complete, plus a + * range-continuation reinit for seek-beyond-buffer. + */ + +export interface IStreamingDecoder { + /** + * Decoded buffers ready to schedule, drained by AudioPlayer after each push/flush. Each entry is a + * standard AudioBuffer at the AudioContext's sample rate, ready for PlaybackScheduler.addBuffer. + */ + readonly hasFatalError: boolean; + + /** True once the decoder has enough to begin playback (header/config established). */ + readonly ready: boolean; + + /** Total stream duration in seconds if known up front (Opus knows it from the sidecar), else null. */ + readonly totalDuration: number | null; + + /** + * Push raw stream bytes. Returns decoded AudioBuffers that became ready (possibly empty — WebCodecs + * decode is async, so a push may return nothing and a later push returns several). + */ + push(chunk: Uint8Array): Promise; + + /** + * Signal end-of-stream. Flushes the decoder and returns any residual decoded buffers (including the + * end-trimmed final buffer). + */ + complete(): Promise; + + /** + * Reinitialize for a Range-continuation after seek-beyond-buffer. The 206 body begins on an Ogg page + * boundary and carries no setup pages — the decoder reuses the cached config and resets demux/codec + * state so inter-frame continuity restarts cleanly from the new offset. + */ + reinitializeForRangeContinuation(): void; + + /** Tear down the underlying WebCodecs decoder and release resources. */ + dispose(): void; +} diff --git a/DeepDrftPublic/Interop/audio/OggDemuxer.ts b/DeepDrftPublic/Interop/audio/OggDemuxer.ts new file mode 100644 index 0000000..e9c9038 --- /dev/null +++ b/DeepDrftPublic/Interop/audio/OggDemuxer.ts @@ -0,0 +1,295 @@ +/** + * OggDemuxer - streaming Ogg-page -> Opus-packet demuxer for the WebCodecs decode path. + * + * Ogg Opus is a containerized, paged format. To feed a WebCodecs `AudioDecoder` we must extract the + * individual Opus *packets* from the Ogg container — the decoder takes packets (as `EncodedAudioChunk`s), + * not raw container bytes. This module is the client-side analogue of the C# `OggOpusParser`: it reads + * the page structure directly (the "OggS" capture pattern + the 27-byte page header + segment table) and + * reassembles packets across the lacing, tracking the granule position that gives each packet its time. + * + * It is deliberately *streaming*: `push(bytes)` accepts arbitrary network chunks (a packet, a page, or a + * fraction of either) and returns whatever WHOLE packets have become available, holding partial state + * across calls. This matches how `StreamAudioWithEarlyPlayback` feeds bytes in adaptive 16–64 KB chunks. + * + * Lacing rules (RFC 3533 §6): a page's segment table lists N segment lengths (0–255). A packet is the + * concatenation of consecutive segments up to and including the first segment whose length is < 255. A + * segment of exactly 255 means "this packet continues into the next segment" — and if it is the page's + * LAST segment, the packet continues into the next page (the next page's header-type has the + * continuation flag set). The granule position on a page is the end-granule of the LAST packet that + * *completes* on that page. + * + * The two leading setup packets (OpusHead, OpusTags) are NOT audio and are skipped — they configure the + * decoder (the sidecar carries them as the codec description), they are never decoded as audio packets. + */ + +const OGG_CAPTURE = [0x4f, 0x67, 0x67, 0x53]; // "OggS" +const OGG_PAGE_HEADER_SIZE = 27; +const GRANULE_OFFSET = 6; // 64-bit granule position within the page header +const HEADER_TYPE_OFFSET = 5; // bit 0x01 = continued packet, 0x02 = BOS, 0x04 = EOS +const SEGMENT_COUNT_OFFSET = 26; // number of segment-table entries +const CONTINUATION_FLAG = 0x01; + +const OPUS_HEAD_SIG = [0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64]; // "OpusHead" +const OPUS_TAGS_SIG = [0x4f, 0x70, 0x75, 0x73, 0x54, 0x61, 0x67, 0x73]; // "OpusTags" + +/** A demuxed Opus audio packet plus the timing context needed to schedule and trim it. */ +export interface OpusPacket { + /** Raw Opus packet bytes (one Opus frame's worth — fed straight to the AudioDecoder). */ + data: Uint8Array; + /** + * The end-granule of the page this packet completed on, or null if the page carried no usable + * granule (mid-stream pages between completion points share the next completing page's granule — + * we attach the granule only to the packet that completes on a granule-bearing page). A 48 kHz + * sample count; presentation time = (granule - preSkip) / 48000. + */ + pageGranule: number | null; + /** True when this packet completed on the stream's final (EOS) page — drives end-trim. */ + isLastPage: boolean; +} + +/** Read a little-endian uint64 as a JS number (exact to 2^53 — far beyond any real granule). */ +function readUint64LE(buf: Uint8Array, offset: number): number { + let lo = 0; + let hi = 0; + for (let i = 0; i < 4; i++) lo += buf[offset + i] * 2 ** (8 * i); + for (let i = 0; i < 4; i++) hi += buf[offset + 4 + i] * 2 ** (8 * i); + return hi * 0x100000000 + lo; +} + +function startsWith(buf: Uint8Array, sig: number[]): boolean { + if (buf.length < sig.length) return false; + for (let i = 0; i < sig.length; i++) if (buf[i] !== sig[i]) return false; + return true; +} + +export class OggDemuxer { + // Unconsumed raw bytes carried across push() calls (a page may straddle a network-chunk boundary). + private pending: Uint8Array = new Uint8Array(0); + // Bytes of a packet that spans pages (255-length last segment + continuation flag next page). + private partialPacket: Uint8Array[] = []; + // Once both setup packets are seen, every subsequent packet is audio. + private setupPacketsSeen = 0; + + /** + * Feed raw stream bytes (any size). Returns all WHOLE Opus AUDIO packets that became decodable, + * in order. Setup packets (OpusHead/OpusTags) are consumed and skipped. Incomplete trailing bytes + * are retained for the next push. + */ + push(bytes: Uint8Array): OpusPacket[] { + this.pending = this.concat(this.pending, bytes); + return this.drainPages(); + } + + /** + * Reset to a fresh stream. Used on seek/range-continuation: the new 206 body begins on a page + * boundary, so all partial-packet and pending state must be dropped. setupPacketsSeen is reset to + * 2 (already configured) for a continuation — a mid-stream slice carries no setup pages, only audio + * pages — so the demuxer treats the first page's packets as audio immediately. + */ + reset(isContinuation: boolean): void { + this.pending = new Uint8Array(0); + this.partialPacket = []; + this.setupPacketsSeen = isContinuation ? 2 : 0; + } + + private drainPages(): OpusPacket[] { + const packets: OpusPacket[] = []; + + for (;;) { + const page = this.tryReadPage(); + if (!page) break; + this.parsePage(page, packets); + } + + return packets; + } + + /** + * Try to slice one complete Ogg page off the front of `pending`. Returns null (and leaves `pending` + * intact) when a whole page is not yet buffered. Resynchronises by scanning for "OggS" if `pending` + * does not start on a page boundary (defensive — the encoder writes contiguous pages, but a + * continuation stream could in theory begin mid-garbage; the seek offset is always a page start). + */ + private tryReadPage(): { header: Uint8Array; segTable: Uint8Array; payload: Uint8Array; total: number } | null { + const buf = this.pending; + if (buf.length < OGG_PAGE_HEADER_SIZE) return null; + + // Resync: ensure we are positioned at a capture pattern. + if (!startsWith(buf, OGG_CAPTURE)) { + const sync = this.findCapture(buf, 0); + if (sync < 0) { + // No capture pattern at all — keep only the last 3 bytes (a capture could straddle). + this.pending = buf.subarray(Math.max(0, buf.length - 3)); + return null; + } + this.pending = buf.subarray(sync); + return this.tryReadPage(); + } + + const segCount = buf[SEGMENT_COUNT_OFFSET]; + const segTableEnd = OGG_PAGE_HEADER_SIZE + segCount; + if (buf.length < segTableEnd) return null; // segment table not fully buffered yet + + const segTable = buf.subarray(OGG_PAGE_HEADER_SIZE, segTableEnd); + let payloadSize = 0; + for (let i = 0; i < segCount; i++) payloadSize += segTable[i]; + + const total = segTableEnd + payloadSize; + if (buf.length < total) return null; // payload not fully buffered yet + + const header = buf.subarray(0, OGG_PAGE_HEADER_SIZE); + const payload = buf.subarray(segTableEnd, total); + + // Advance past this page. + this.pending = buf.subarray(total); + return { header, segTable, payload, total }; + } + + private parsePage( + page: { header: Uint8Array; segTable: Uint8Array; payload: Uint8Array; total: number }, + out: OpusPacket[] + ): void { + const { header, segTable, payload } = page; + const headerType = header[HEADER_TYPE_OFFSET]; + const continued = (headerType & CONTINUATION_FLAG) !== 0; + const isEos = (headerType & 0x04) !== 0; + const granule = readUint64LE(header, GRANULE_OFFSET); + // 0xFFFFFFFFFFFFFFFF (-1) means "no packet completed on this page" — no usable timestamp. + const hasGranule = !(header[GRANULE_OFFSET] === 0xff && header[GRANULE_OFFSET + 1] === 0xff && + header[GRANULE_OFFSET + 2] === 0xff && header[GRANULE_OFFSET + 3] === 0xff && + header[GRANULE_OFFSET + 4] === 0xff && header[GRANULE_OFFSET + 5] === 0xff && + header[GRANULE_OFFSET + 6] === 0xff && header[GRANULE_OFFSET + 7] === 0xff); + + // If this page does NOT begin with a continuation, any half-built packet from a prior page is + // orphaned (should not happen in a well-formed stream, but never carry garbage forward). + if (!continued) this.partialPacket = []; + + // Walk the segment table, reassembling packets. A packet ends at the first segment < 255. + const completedPackets: Uint8Array[] = []; + let segStart = 0; + let cursor = 0; + for (let i = 0; i < segTable.length; i++) { + const len = segTable[i]; + cursor += len; + if (len < 255) { + // Packet boundary: segments [segStart, cursor) form (the tail of) a packet. + const slice = payload.subarray(segStart, cursor); + if (this.partialPacket.length > 0) { + this.partialPacket.push(slice); + completedPackets.push(this.flattenPartial()); + this.partialPacket = []; + } else { + completedPackets.push(slice); + } + segStart = cursor; + } + // len === 255 with i === last segment -> packet spans into the next page (handled below). + } + + // Any trailing 255-run that did not terminate is a packet continuing into the next page. + if (segStart < cursor) { + this.partialPacket.push(payload.subarray(segStart, cursor)); + } + + // Classify completed packets: the first two whole packets in the whole stream are the setup + // packets (OpusHead, OpusTags) and are skipped. Everything after is audio. The page granule is + // attached to the LAST completing audio packet on a granule-bearing page (the granule is that + // page's end-granule per RFC 7845). + for (let p = 0; p < completedPackets.length; p++) { + const pkt = completedPackets[p]; + if (this.setupPacketsSeen < 2) { + // Only count packets that are actually the Opus setup headers; guard against a stray + // first audio packet being mistaken for setup on a continuation (reset handles that). + if (this.setupPacketsSeen === 0 && startsWith(pkt, OPUS_HEAD_SIG)) { + this.setupPacketsSeen = 1; + continue; + } + if (this.setupPacketsSeen === 1 && startsWith(pkt, OPUS_TAGS_SIG)) { + this.setupPacketsSeen = 2; + continue; + } + // Not a recognised setup packet while we still expected one — treat as audio (a + // continuation slice that began mid-stream). Fall through. + } + + const isLastCompleting = p === completedPackets.length - 1; + out.push({ + data: pkt, + pageGranule: hasGranule && isLastCompleting ? granule : null, + isLastPage: isEos + }); + } + } + + private flattenPartial(): Uint8Array { + if (this.partialPacket.length === 1) return this.partialPacket[0]; + let len = 0; + for (const s of this.partialPacket) len += s.length; + const out = new Uint8Array(len); + let o = 0; + for (const s of this.partialPacket) { + out.set(s, o); + o += s.length; + } + return out; + } + + private findCapture(buf: Uint8Array, from: number): number { + for (let i = from; i + 4 <= buf.length; i++) { + if (buf[i] === OGG_CAPTURE[0] && buf[i + 1] === OGG_CAPTURE[1] && + buf[i + 2] === OGG_CAPTURE[2] && buf[i + 3] === OGG_CAPTURE[3]) { + return i; + } + } + return -1; + } + + private concat(a: Uint8Array, b: Uint8Array): Uint8Array { + if (a.length === 0) return b; + if (b.length === 0) return a; + const out = new Uint8Array(a.length + b.length); + out.set(a, 0); + out.set(b, a.length); + return out; + } +} + +/** + * Extract the raw OpusHead identification-header *packet* from the sidecar's setup-header bytes (which + * are the verbatim Ogg PAGES wrapping OpusHead + OpusTags). WebCodecs' `AudioDecoderConfig.description` + * for Opus is the OpusHead packet (RFC 7845 §5.1), not the Ogg page — so we demux the setup pages and + * return the first packet's bytes. Returns null if no OpusHead packet is found. + */ +export function extractOpusHead(setupHeaderBytes: Uint8Array): Uint8Array | null { + // Walk pages manually (the setup region is small — at most two pages) and return the first packet + // that starts with the OpusHead signature. + let offset = 0; + while (offset + OGG_PAGE_HEADER_SIZE <= setupHeaderBytes.length) { + if (!(setupHeaderBytes[offset] === OGG_CAPTURE[0] && setupHeaderBytes[offset + 1] === OGG_CAPTURE[1] && + setupHeaderBytes[offset + 2] === OGG_CAPTURE[2] && setupHeaderBytes[offset + 3] === OGG_CAPTURE[3])) { + return null; + } + const segCount = setupHeaderBytes[offset + SEGMENT_COUNT_OFFSET]; + const segTableEnd = offset + OGG_PAGE_HEADER_SIZE + segCount; + if (segTableEnd > setupHeaderBytes.length) return null; + let payloadSize = 0; + for (let i = 0; i < segCount; i++) payloadSize += setupHeaderBytes[segTableEnd - segCount + i]; + const payloadStart = segTableEnd; + const payloadEnd = payloadStart + payloadSize; + if (payloadEnd > setupHeaderBytes.length) return null; + + const payload = setupHeaderBytes.subarray(payloadStart, payloadEnd); + if (startsWith(payload, OPUS_HEAD_SIG)) { + // The OpusHead packet is the whole first-page payload (it always fits one segment / page). + return payload; + } + offset = payloadEnd; + } + return null; +} + +/** Channel count from an OpusHead packet (RFC 7845 §5.1: byte 9, after the 8-byte magic + version). */ +export function opusHeadChannelCount(opusHead: Uint8Array): number { + if (opusHead.length < 10) return 2; // safe nominal + return opusHead[9]; +} diff --git a/DeepDrftPublic/Interop/audio/OpusCapability.ts b/DeepDrftPublic/Interop/audio/OpusCapability.ts index 39904be..c1933af 100644 --- a/DeepDrftPublic/Interop/audio/OpusCapability.ts +++ b/DeepDrftPublic/Interop/audio/OpusCapability.ts @@ -1,102 +1,46 @@ /** - * OpusCapability - runtime detection of Ogg-Opus decode support. + * OpusCapability - runtime detection of WebCodecs Ogg-Opus decode support. * - * The bespoke graph decodes segments via `AudioContext.decodeAudioData`. Ogg-Opus support there - * is long-standing in Chrome and Firefox but arrived in Safari only at 18.4 (macOS 15.4 / iOS 18.4, - * March 2025); older Safari decodes Opus only in a CAF container, not Ogg. iOS Safari is a primary - * music-listening surface, so a browser that cannot decode Ogg Opus must fall back to the lossless - * WAV path (§3.4 / OQ2). + * The Opus decode path is a WebCodecs `AudioDecoder` streaming pipeline (OpusStreamDecoder), NOT + * `decodeAudioData`. So the capability gate must test the path actually used: whether the browser has + * `AudioDecoder` AND supports the `codec: 'opus'` config. `AudioDecoder` is available on Chrome/Edge, + * Firefox 130+, and Safari 16.4+; older Safari and older Firefox lack it, and those listeners fall back + * to the universal lossless WAV path (§3.4 / OQ2 / AC7 — no listener ever gets silence over a codec gap). * - * This module is the detection *seam* only — it answers "can this browser decode Ogg Opus?". The - * player (waves 18.5 / 18.6) consumes the answer to choose the delivery format; this module never - * touches the player or the stream request. - * - * Detection is a genuine probe: a tiny in-memory Ogg-Opus blob is handed to `decodeAudioData`. A - * UA/version gate was rejected because Safari's Opus story is version-specific and UA strings lie; - * a real decode attempt is authoritative. The result is cached after the first probe (capability - * does not change within a session). + * This module is the detection *seam* only — it answers "can this browser stream-decode Opus via + * WebCodecs?". The player (StreamingAudioPlayerService.ResolveStreamFormatAsync) consumes the answer to + * choose the delivery format; this module never touches the player or the stream request. The result is + * cached after the first probe (capability does not change within a session). */ -/** - * A minimal, valid Ogg-Opus file generated by ffmpeg/libopus (libopus via Lavc62, libavformat62). - * Three pages: OpusHead (page 0), OpusTags (page 1), one audio page of ~50 ms silence (page 2, - * EOS flag set). Mono, 48 kHz. All three Ogg page CRC32s are verified correct — generated by - * construction; not hand-assembled. - * - * ffmpeg command: - * /c/ffmpeg/ffmpeg.exe -f lavfi -i anullsrc=r=48000:cl=mono -t 0.05 \ - * -c:a libopus -b:a 24k -f ogg /tmp/opusprobe.opus - * - * 176 bytes decoded; 236 chars base64. - */ -const PROBE_OGG_OPUS_BASE64 = - 'T2dnUwACAAAAAAAAAAD/3cwSAAAAAJGmJikBE09wdXNIZWFkAQE4AYC7AAAAAABPZ2dTAAAA' + - 'AAAAAAAAAP/dzBIBAAAA6iGxjgE+T3B1c1RhZ3MNAAAATGF2ZjYyLjEyLjEwMQEAAAAdAAAA' + - 'ZW5jb2Rlcj1MYXZjNjIuMjguMTAxIGxpYm9wdXNPZ2dTAASYCgAAAAAAAP/dzBICAAAAjUsr' + - 'kAMDAwP4//74//74//4='; - let cachedSupport: Promise | null = null; /** - * Resolve whether this browser can decode Ogg Opus via `decodeAudioData`. Cached after the first - * call. Never rejects — a probe failure resolves to `false` (treat as unsupported, fall back to - * lossless). Pass an existing `AudioContext`/`OfflineAudioContext` to avoid allocating one; if none - * is given, a short-lived `OfflineAudioContext` is created and torn down. + * Resolve whether this browser can stream-decode Ogg Opus via WebCodecs. Cached after the first call. + * Never rejects — any failure (no AudioDecoder, unsupported config, thrown probe) resolves to `false` + * (treat as unsupported, fall back to lossless) so an interop error can never silence playback. */ -export function canDecodeOggOpus(context?: BaseAudioContext): Promise { +export function canDecodeOggOpus(): Promise { if (cachedSupport === null) { - cachedSupport = probe(context); + cachedSupport = probe(); } return cachedSupport; } -async function probe(context?: BaseAudioContext): Promise { - let ctx = context; - let ownsContext = false; - +async function probe(): Promise { try { - if (!ctx) { - const OfflineCtor = - (globalThis as { OfflineAudioContext?: typeof OfflineAudioContext }).OfflineAudioContext ?? - (globalThis as { webkitOfflineAudioContext?: typeof OfflineAudioContext }).webkitOfflineAudioContext; - if (!OfflineCtor) return false; - // 1 channel, 1 frame, 48 kHz — the smallest legal context; we never render it. - ctx = new OfflineCtor(1, 1, OPUS_PROBE_SAMPLE_RATE); - ownsContext = true; + if (typeof AudioDecoder === 'undefined' || typeof AudioDecoder.isConfigSupported !== 'function') { + return false; } - - const buffer = base64ToArrayBuffer(PROBE_OGG_OPUS_BASE64); - // decodeAudioData detaches the buffer; the probe blob is single-use, so that is fine. - await decode(ctx, buffer); - return true; + // 48 kHz stereo is the canonical fullband Opus shape this site produces. isConfigSupported does + // not need the OpusHead `description` to report codec support, so we probe without it. + const result = await AudioDecoder.isConfigSupported({ + codec: 'opus', + sampleRate: 48000, + numberOfChannels: 2 + }); + return result.supported === true; } catch { - // DOMException (unsupported / corrupt) or any allocation failure -> unsupported. return false; - } finally { - // OfflineAudioContext has no close() in all engines; guard it. - if (ownsContext && ctx && 'close' in ctx && typeof (ctx as AudioContext).close === 'function') { - try { await (ctx as AudioContext).close(); } catch { /* best-effort teardown */ } - } } } - -const OPUS_PROBE_SAMPLE_RATE = 48000; - -/** Promisify decodeAudioData; older Safari only supports the callback form. */ -function decode(ctx: BaseAudioContext, buffer: ArrayBuffer): Promise { - return new Promise((resolve, reject) => { - const result = ctx.decodeAudioData(buffer, resolve, reject); - // Modern engines return a Promise; bridge it so a rejection isn't dropped. - if (result && typeof (result as Promise).then === 'function') { - (result as Promise).then(resolve, reject); - } - }); -} - -function base64ToArrayBuffer(b64: string): ArrayBuffer { - const binary = atob(b64); - const buffer = new ArrayBuffer(binary.length); - const out = new Uint8Array(buffer); - for (let i = 0; i < binary.length; i++) out[i] = binary.charCodeAt(i); - return buffer; -} diff --git a/DeepDrftPublic/Interop/audio/OpusFormatDecoder.test.ts b/DeepDrftPublic/Interop/audio/OpusFormatDecoder.test.ts deleted file mode 100644 index e2a0ad1..0000000 --- a/DeepDrftPublic/Interop/audio/OpusFormatDecoder.test.ts +++ /dev/null @@ -1,364 +0,0 @@ -/** - * OpusFormatDecoder / OpusSidecar tests. - * - * There is no TS test runner configured in this repo (no package.json, no jest/vitest, no other - * *.test.ts). Rather than introduce a heavy harness, this file is a self-contained, zero-dependency - * test: a ~15-line inline assert/test harness, no `node:` imports, no DOM. It exercises the pure - * parser / resolver / alignment logic (none of which touches the DOM or Web Audio). - * - * It is EXCLUDED from the production tsc build (tsconfig `exclude: Interop/**\/*.test.ts`) so it - * never ships in wwwroot/js. To run it (Node 22+ strips TS types natively — no tsc, no deps), the - * test's `.js` import specifiers must resolve to the COMPILED decoder modules, so run a copy from - * the compiled output directory: - * - * # 1. produce the compiled decoder modules (the normal build already does this): - * dotnet build DeepDrftPublic/DeepDrftPublic.csproj - * # 2. run this test next to the compiled .js siblings (Node strips the types at load): - * cp DeepDrftPublic/Interop/audio/OpusFormatDecoder.test.ts DeepDrftPublic/wwwroot/js/audio/ - * node DeepDrftPublic/wwwroot/js/audio/OpusFormatDecoder.test.ts - * - * A thrown error / non-zero exit signals failure; "ALL TESTS PASSED" signals success. (The - * copied file lives only in the gitignored wwwroot/js output; the source under Interop is the - * committed artifact.) - * - * The sidecar bytes built in `makeSidecar` reproduce the C# wire format byte-for-byte - * (DeepDrftContent.Processors.Opus.OpusSidecar.ToBytes / OggOpusSeekIndex.ToBytes): - * [uint32 setupHeaderLength][setup bytes] - * [uint64 totalByteLength][double totalDuration][uint32 count][uint16 preSkip][uint16 reserved] - * count x [uint64 granulePosition][uint64 byteOffset] — all little-endian. - * The C# serializer is the source of truth; this verifies the TS parser is its exact counterpart. - */ - -import { parseSidecar, presentationTimeSeconds, OPUS_SAMPLE_RATE } from './OpusSidecar.js'; -import type { OpusSeekData } from './OpusSidecar.js'; -import { OpusFormatDecoder } from './OpusFormatDecoder.js'; -import type { FormatInfo } from './IFormatDecoder.js'; - -// --- tiny inline harness (no dependencies) --------------------------------------------------- -let passed = 0; -const failures: string[] = []; -function test(name: string, fn: () => void): void { - try { - fn(); - passed++; - } catch (e) { - failures.push(`FAIL: ${name}\n ${(e as Error).message}`); - } -} -function assertEqual(actual: unknown, expected: unknown, msg?: string): void { - if (actual !== expected) { - throw new Error(`${msg ?? 'assertEqual'}: expected ${String(expected)}, got ${String(actual)}`); - } -} -function assertArray(actual: ArrayLike, expected: number[], msg?: string): void { - const a = Array.from(actual); - if (a.length !== expected.length || a.some((v, i) => v !== expected[i])) { - throw new Error(`${msg ?? 'assertArray'}: expected [${expected}], got [${a}]`); - } -} -function assertNull(actual: unknown, msg?: string): void { - if (actual !== null) throw new Error(`${msg ?? 'assertNull'}: expected null, got ${String(actual)}`); -} -function assertNotNull(actual: T | null, msg?: string): T { - if (actual === null) throw new Error(`${msg ?? 'assertNotNull'}: got null`); - return actual; -} - -interface SidecarSpec { - setupHeader: number[]; - totalByteLength: number; - totalDuration: number; - preSkip: number; - points: Array<{ granule: number; byteOffset: number }>; -} - -/** Serialize a sidecar blob exactly as the C# OpusSidecar/OggOpusSeekIndex writers do. */ -function makeSidecar(spec: SidecarSpec): Uint8Array { - const SEEK_INDEX_HEADER_SIZE = 24; - const SEEK_POINT_SIZE = 16; - const setupLen = spec.setupHeader.length; - const total = 4 + setupLen + SEEK_INDEX_HEADER_SIZE + spec.points.length * SEEK_POINT_SIZE; - - const bytes = new Uint8Array(total); - const view = new DataView(bytes.buffer); - - view.setUint32(0, setupLen, true); - bytes.set(spec.setupHeader, 4); - - let p = 4 + setupLen; - writeUint64(view, p, spec.totalByteLength); - view.setFloat64(p + 8, spec.totalDuration, true); - view.setUint32(p + 16, spec.points.length, true); - view.setUint16(p + 20, spec.preSkip, true); - // bytes 22-23 reserved (zero) - - p += SEEK_INDEX_HEADER_SIZE; - for (const pt of spec.points) { - writeUint64(view, p, pt.granule); - writeUint64(view, p + 8, pt.byteOffset); - p += SEEK_POINT_SIZE; - } - return bytes; -} - -function writeUint64(view: DataView, offset: number, value: number): void { - view.setUint32(offset, value >>> 0, true); - view.setUint32(offset + 4, Math.floor(value / 0x100000000), true); -} - -function formatInfoFor(sidecar: Uint8Array): FormatInfo { - const decoder = new OpusFormatDecoder(); - const parsed = assertNotNull(parseSidecar(sidecar), 'sidecar should parse'); - decoder.setSidecar(parsed); - return assertNotNull(decoder.tryParseHeader([], 0), 'tryParseHeader should build FormatInfo'); -} - -// --- parseSidecar: byte-for-byte round-trip against the C# layout ----------------------------- - -test('parseSidecar round-trips the C# binary layout exactly', () => { - const setup = [0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64]; // "OpusHead" stand-in - const spec: SidecarSpec = { - setupHeader: setup, - totalByteLength: 1_234_567, - totalDuration: 212.5, - preSkip: 312, - points: [ - { granule: 312, byteOffset: 4096 }, // first point: granule == preSkip -> t=0 - { granule: 312 + 24000, byteOffset: 9000 }, // +0.5 s - { granule: 312 + 48000, byteOffset: 14000 }, // +1.0 s - ], - }; - - const parsed: OpusSeekData = assertNotNull(parseSidecar(makeSidecar(spec))); - assertEqual(parsed.kind, 'opus-sidecar', 'kind'); - assertArray(parsed.setupHeaderBytes, setup, 'setup header bytes'); - assertEqual(parsed.totalByteLength, spec.totalByteLength, 'totalByteLength'); - assertEqual(parsed.totalDurationSeconds, spec.totalDuration, 'totalDuration'); - assertEqual(parsed.preSkip, spec.preSkip, 'preSkip'); - assertEqual(parsed.points.length, 3, 'point count'); - assertEqual(parsed.points[1].granulePosition, 312 + 24000, 'point[1].granule'); - assertEqual(parsed.points[1].byteOffset, 9000, 'point[1].byteOffset'); -}); - -test('parseSidecar honours a borrowed view byteOffset (sidecar not at buffer start)', () => { - const blob = makeSidecar({ - setupHeader: [1, 2, 3, 4], - totalByteLength: 100, - totalDuration: 1.0, - preSkip: 0, - points: [{ granule: 0, byteOffset: 8 }], - }); - const padded = new Uint8Array(blob.length + 7); - padded.set(blob, 7); - const parsed = assertNotNull(parseSidecar(padded.subarray(7))); - assertArray(parsed.setupHeaderBytes, [1, 2, 3, 4], 'borrowed setup bytes'); - assertEqual(parsed.points[0].byteOffset, 8, 'borrowed point offset'); -}); - -test('parseSidecar returns null on a truncated blob', () => { - const blob = makeSidecar({ - setupHeader: [0], - totalByteLength: 1, - totalDuration: 0, - preSkip: 0, - points: [{ granule: 0, byteOffset: 0 }], - }); - assertNull(parseSidecar(blob.subarray(0, 3)), 'short of length prefix'); - assertNull(parseSidecar(blob.subarray(0, blob.length - 4)), 'declared count overruns'); -}); - -test('presentationTimeSeconds applies preSkip and clamps at zero (RFC 7845)', () => { - assertEqual(presentationTimeSeconds(312, 312), 0, 'granule == preSkip'); - assertEqual(presentationTimeSeconds(0, 312), 0, 'below preSkip clamps'); - assertEqual(presentationTimeSeconds(312 + OPUS_SAMPLE_RATE, 312), 1.0, '+48000 -> 1 s'); -}); - -// --- calculateByteOffset: binary search over the precomputed index (exact, not interpolation) - - -test('calculateByteOffset returns the page-start of the largest entry with time <= t', () => { - const points = [0, 1, 2, 3].map(i => ({ - granule: 1000 + i * (OPUS_SAMPLE_RATE / 2), - byteOffset: 4096 + i * 5000, - })); - const info = formatInfoFor(makeSidecar({ - setupHeader: [9, 9, 9, 9], totalByteLength: 999_999, totalDuration: 1.5, preSkip: 1000, points, - })); - const d = new OpusFormatDecoder(); - assertEqual(d.calculateByteOffset(info, 0.0), 4096, 't=0 -> first point'); - assertEqual(d.calculateByteOffset(info, 0.4), 4096, 'just before bucket 1'); - assertEqual(d.calculateByteOffset(info, 0.5), 9096, 'exactly bucket 1'); - assertEqual(d.calculateByteOffset(info, 0.9), 9096, 'within bucket 1'); - assertEqual(d.calculateByteOffset(info, 1.0), 14096, 'exactly bucket 2'); - assertEqual(d.calculateByteOffset(info, 99), 19096, 'past end -> last point'); -}); - -test('calculateByteOffset never interpolates between points', () => { - const info = formatInfoFor(makeSidecar({ - setupHeader: [0], totalByteLength: 10_000, totalDuration: 1.0, preSkip: 0, - points: [{ granule: 0, byteOffset: 100 }, { granule: OPUS_SAMPLE_RATE, byteOffset: 9000 }], - })); - const d = new OpusFormatDecoder(); - assertEqual(d.calculateByteOffset(info, 0.5), 100, 'midpoint snaps to lower page start'); -}); - -test('calculateByteOffset degrades to audioDataOffset with an empty index', () => { - const info = formatInfoFor(makeSidecar({ - setupHeader: [1, 2, 3, 4, 5], totalByteLength: 0, totalDuration: 0, preSkip: 0, points: [], - })); - const d = new OpusFormatDecoder(); - assertEqual(info.audioDataOffset, 5, 'audioDataOffset == setup header length'); - assertEqual(d.calculateByteOffset(info, 10), info.audioDataOffset, 'empty index degrades'); -}); - -// --- getAlignedSegmentSize: Ogg "OggS" page-boundary alignment -------------------------------- - -function withOggS(len: number, ...pageStarts: number[]): Uint8Array { - const out = new Uint8Array(len).fill(0xaa); - for (const s of pageStarts) { out[s] = 0x4f; out[s + 1] = 0x67; out[s + 2] = 0x67; out[s + 3] = 0x53; } - return out; -} -const stubInfo = { audioDataOffset: 0 } as FormatInfo; - -test('getAlignedSegmentSize cuts at the last OggS page start within the window', () => { - const raw = withOggS(64, 4, 40); - assertEqual(new OpusFormatDecoder().getAlignedSegmentSize(stubInfo, 64, 64, false, raw), 40, 'last page start'); -}); - -test('getAlignedSegmentSize waits (returns 0) when no page boundary is found mid-stream', () => { - const raw = withOggS(64); - assertEqual(new OpusFormatDecoder().getAlignedSegmentSize(stubInfo, 64, 64, false, raw), 0, 'no boundary'); -}); - -test('getAlignedSegmentSize flushes the whole candidate on stream completion without a boundary', () => { - const raw = withOggS(64); - assertEqual(new OpusFormatDecoder().getAlignedSegmentSize(stubInfo, 64, 64, true, raw), 64, 'flush on complete'); -}); - -test('getAlignedSegmentSize ignores a page start at offset 0 (needs a real cut point)', () => { - const raw = withOggS(64, 0); - assertEqual(new OpusFormatDecoder().getAlignedSegmentSize(stubInfo, 64, 64, false, raw), 0, 'offset 0 skipped'); -}); - -// --- wrapSegment: OpusHead/OpusTags setup-header carry ---------------------------------------- - -test('wrapSegment prepends the cached setup bytes to a page run', () => { - const setup = [0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64]; // "OpusHead" - const info = formatInfoFor(makeSidecar({ - setupHeader: setup, totalByteLength: 100, totalDuration: 1, preSkip: 0, - points: [{ granule: 0, byteOffset: setup.length }], - })); - const pageRun = new Uint8Array([0x4f, 0x67, 0x67, 0x53, 0x11, 0x22]); // "OggS" + payload - const wrapped = new OpusFormatDecoder().wrapSegment(info, pageRun); - assertArray(wrapped.subarray(0, setup.length), setup, 'setup header first'); - assertArray(wrapped.subarray(setup.length), [0x4f, 0x67, 0x67, 0x53, 0x11, 0x22], 'page run follows'); -}); - -// --- OpusCapability probe sample: structural validity guard ----------------------------------- -// -// These tests decode PROBE_OGG_OPUS_BASE64 from OpusCapability.ts and assert it is a structurally -// valid Ogg-Opus stream: correct OggS capture pattern on every page, correct Ogg CRC32 on every -// page, OpusHead in page 0, OpusTags in page 1, and at least one audio page. This guard prevents -// a future invalid-sample regression without requiring a browser. -// -// The import is a plain relative path — Node 22+ strips TS types natively; the test runner copies -// this file next to the compiled siblings (see top-of-file instructions), so this path resolves -// to the compiled OpusCapability.js at that point. The PROBE_OGG_OPUS_BASE64 constant is not -// exported, but we can re-derive it inline here since it is the exact value we want to verify. - -/** Ogg CRC-32 (poly 0x04c11db7, init 0, no reflection — RFC 3533 §6.3). */ -function oggCrc32(buf: Uint8Array): number { - const table = new Uint32Array(256); - for (let i = 0; i < 256; i++) { - let r = i << 24; - for (let j = 0; j < 8; j++) r = (r & 0x80000000) ? ((r << 1) ^ 0x04c11db7) : (r << 1); - table[i] = r >>> 0; - } - let crc = 0; - for (let i = 0; i < buf.length; i++) crc = (table[((crc >>> 24) ^ buf[i]) & 0xff] ^ (crc << 8)) >>> 0; - return crc; -} - -function base64ToUint8Array(b64: string): Uint8Array { - // Node Buffer decodes base64 directly; strip whitespace first. - return Buffer.from(b64.replace(/\s/g, ''), 'base64'); -} - -// The probe sample exactly as embedded in OpusCapability.ts. Keep in sync with that constant. -const PROBE_OGG_OPUS_BASE64_TEST = - 'T2dnUwACAAAAAAAAAAD/3cwSAAAAAJGmJikBE09wdXNIZWFkAQE4AYC7AAAAAABPZ2dTAAAA' + - 'AAAAAAAAAP/dzBIBAAAA6iGxjgE+T3B1c1RhZ3MNAAAATGF2ZjYyLjEyLjEwMQEAAAAdAAAA' + - 'ZW5jb2Rlcj1MYXZjNjIuMjguMTAxIGxpYm9wdXNPZ2dTAASYCgAAAAAAAP/dzBICAAAAjUsr' + - 'kAMDAwP4//74//74//4='; - -interface OggPage { - magic: string; - headerType: number; - seqno: number; - storedCrc: number; - payload: Uint8Array; - pageBytes: Uint8Array; // full page bytes with CRC field zeroed for verification -} - -function scanOggPages(data: Uint8Array): OggPage[] | string { - const pages: OggPage[] = []; - let offset = 0; - while (offset < data.length) { - if (offset + 27 > data.length) return `page ${pages.length}: header truncated at offset ${offset}`; - const magic = String.fromCharCode(data[offset], data[offset+1], data[offset+2], data[offset+3]); - if (magic !== 'OggS') return `page ${pages.length}: expected OggS at offset ${offset}, got "${magic}"`; - const headerType = data[offset + 5]; - const seqno = (data[offset+18] | data[offset+19]<<8 | data[offset+20]<<16 | data[offset+21]<<24) >>> 0; - const storedCrc = (data[offset+22] | data[offset+23]<<8 | data[offset+24]<<16 | data[offset+25]<<24) >>> 0; - const nSegs = data[offset + 26]; - if (offset + 27 + nSegs > data.length) return `page ${pages.length}: segment table overruns`; - const segTable = data.slice(offset + 27, offset + 27 + nSegs); - const pageDataLen = Array.from(segTable).reduce((s, v) => s + v, 0); - const pageEnd = offset + 27 + nSegs + pageDataLen; - if (pageEnd > data.length) return `page ${pages.length}: payload overruns (need ${pageEnd}, have ${data.length})`; - const pageBytes = new Uint8Array(data.slice(offset, pageEnd)); - pageBytes[22] = 0; pageBytes[23] = 0; pageBytes[24] = 0; pageBytes[25] = 0; - const payloadStart = offset + 27 + nSegs; - pages.push({ magic, headerType, seqno, storedCrc, payload: data.slice(payloadStart, pageEnd), pageBytes }); - offset = pageEnd; - } - return pages; -} - -test('PROBE_OGG_OPUS_BASE64 decodes to a structurally valid Ogg stream (OggS magic + CRC32 on every page)', () => { - const bytes = base64ToUint8Array(PROBE_OGG_OPUS_BASE64_TEST); - if (bytes.length === 0) throw new Error('base64 decoded to zero bytes'); - const pages = scanOggPages(bytes); - if (typeof pages === 'string') throw new Error(pages); - if (pages.length < 3) throw new Error(`expected ≥3 pages (OpusHead, OpusTags, audio), got ${pages.length}`); - for (let i = 0; i < pages.length; i++) { - const p = pages[i]; - if (p.magic !== 'OggS') throw new Error(`page ${i}: magic is "${p.magic}", expected "OggS"`); - const computed = oggCrc32(p.pageBytes); - if (computed !== p.storedCrc) { - throw new Error(`page ${i}: CRC mismatch — stored=0x${p.storedCrc.toString(16)}, computed=0x${computed.toString(16)}`); - } - } -}); - -test('PROBE_OGG_OPUS_BASE64 page 0 contains OpusHead', () => { - const bytes = base64ToUint8Array(PROBE_OGG_OPUS_BASE64_TEST); - const pages = scanOggPages(bytes); - if (typeof pages === 'string') throw new Error(pages); - const magic = String.fromCharCode(...Array.from(pages[0].payload.slice(0, 8))); - if (magic !== 'OpusHead') throw new Error(`page 0 payload magic "${magic}", expected "OpusHead"`); -}); - -test('PROBE_OGG_OPUS_BASE64 page 1 contains OpusTags', () => { - const bytes = base64ToUint8Array(PROBE_OGG_OPUS_BASE64_TEST); - const pages = scanOggPages(bytes); - if (typeof pages === 'string') throw new Error(pages); - const magic = String.fromCharCode(...Array.from(pages[1].payload.slice(0, 8))); - if (magic !== 'OpusTags') throw new Error(`page 1 payload magic "${magic}", expected "OpusTags"`); -}); - -// --- report ---------------------------------------------------------------------------------- -if (failures.length > 0) { - console.error(failures.join('\n')); - throw new Error(`${failures.length} test(s) failed, ${passed} passed`); -} -console.log(`ALL ${passed} TESTS PASSED`); diff --git a/DeepDrftPublic/Interop/audio/OpusFormatDecoder.ts b/DeepDrftPublic/Interop/audio/OpusFormatDecoder.ts deleted file mode 100644 index f89709a..0000000 --- a/DeepDrftPublic/Interop/audio/OpusFormatDecoder.ts +++ /dev/null @@ -1,169 +0,0 @@ -/** - * OpusFormatDecoder - Ogg-Opus implementation of IFormatDecoder. - * - * Ogg Opus is a containerized, paged format — NOT raw-frame-sliceable the way WAV PCM is. Two - * things make a mid-stream byte slice decodable: (1) it must begin on an Ogg page boundary, and - * (2) the OpusHead/OpusTags setup pages must be prepended (analogous to FLAC's STREAMINFO carry). - * This decoder owns both, plus VBR-safe accurate seeking. - * - * Where the metadata comes from is the genuinely new part. WAV/MP3/FLAC parse everything out of - * the byte stream. Opus is VBR and container-paged, so a byteRate seek would be inaccurate; instead - * the seek transfer function (granule->byte) and the setup bytes are precomputed at transcode time - * (wave 18.1) and delivered as a one-time sidecar fetch (wave 18.5). The injection seam is - * `setSidecar(OpusSeekData)` — call it with the parsed sidecar BEFORE the stream is initialized so - * `tryParseHeader` can build FormatInfo from it. Without a sidecar the decoder cannot stream Opus - * (returns null from tryParseHeader); 18.5 guarantees the fetch precedes stream init. - * - * - getAlignedSegmentSize aligns to Ogg page boundaries by scanning for the "OggS" capture - * pattern (the Ogg analogue of FLAC's frame-sync scan; the interface passes rawData for this). - * - wrapSegment prepends the cached OpusHead/OpusTags setup bytes so any mid-stream page run is - * independently decodable. - * - calculateByteOffset binary-searches the precomputed index for the largest entry with - * presentation-time <= t and returns its exact page-start byte offset — NOT interpolation, - * NOT byteRate math (§3.4a A/C; C5 accurate seek). - */ - -import { FormatInfo, IFormatDecoder } from './IFormatDecoder.js'; -import { OpusSeekData, OPUS_SAMPLE_RATE, presentationTimeSeconds } from './OpusSidecar.js'; - -// "OggS" — every Ogg page begins with this 4-byte capture pattern. -const OGG_CAPTURE = [0x4f, 0x67, 0x67, 0x53]; // 'O' 'g' 'g' 'S' - -export class OpusFormatDecoder implements IFormatDecoder { - // The parsed sidecar: setup bytes + seek index + preSkip + totals. Injected by wave 18.5 via - // setSidecar before stream init. Held for the stream's lifetime (the format does not change - // across a seek/continuation), mirroring how FlacFormatDecoder retains streamInfoBytes. - private sidecar: OpusSeekData | null = null; - - /** - * Inject the parsed sidecar (setup header + seek index) for this stream. Wave 18.5 calls this - * after its one-time sidecar fetch + parseSidecar, before initializeStreaming. This is the seam - * that keeps the HTTP fetch out of the decoder: the decoder is pure and unit-testable against - * synthetic bytes, and 18.5 wires the real transport. - */ - setSidecar(sidecar: OpusSeekData): void { - this.sidecar = sidecar; - } - - tryParseHeader(_chunks: Uint8Array[], _totalSize: number): FormatInfo | null { - // Opus metadata is NOT parsed from the stream — it comes from the injected sidecar. Without - // it we cannot stream Opus; return null so StreamDecoder waits, and 18.5's contract (fetch + - // setSidecar before stream init) prevents that null from persisting. - const sidecar = this.sidecar; - if (!sidecar) return null; - - // For the initial full-file stream the server emits [setup pages][audio pages], and the - // sidecar's setup bytes are exactly those leading pages — so audio data begins right after - // them. This is the file-absolute offset of the first audio page (== the first index point's - // byteOffset by construction). - const audioDataOffset = sidecar.setupHeaderBytes.length; - - return { - // Opus always decodes at 48 kHz regardless of the source rate (RFC 7845). - sampleRate: OPUS_SAMPLE_RATE, - // Channel count is encoded in OpusHead; the decoder reads it from the prepended setup - // bytes at decode time. FormatInfo.channels is display-only here — 2 is the safe nominal. - channels: 2, - bitsPerSample: 16, - byteRate: 0, // VBR + paged; seeking uses the index, never byteRate. - blockAlign: 0, // No fixed alignment; segments align to Ogg page starts via OggS scan. - totalDuration: sidecar.totalDurationSeconds > 0 ? sidecar.totalDurationSeconds : null, - audioDataOffset, - seekData: sidecar - }; - } - - getAlignedSegmentSize( - info: FormatInfo, - availableBytes: number, - requestedSize: number, - streamComplete: boolean, - rawData?: Uint8Array - ): number { - if (availableBytes === 0) return 0; - const candidate = Math.min(requestedSize, availableBytes); - - if (!rawData || rawData.length === 0) { - // No scan data — conservative threshold to avoid tiny unusable segments (mirrors FLAC). - if (!streamComplete && availableBytes < 16384) return 0; - return candidate; - } - - // Scan backward from the candidate boundary for the start of the last Ogg page. Cutting on a - // page start keeps the next segment Ogg-sync-aligned and the current one a whole page run. - const boundary = OpusFormatDecoder.findLastOggPage(rawData, candidate); - if (boundary <= 0) { - if (streamComplete) return candidate; // flush remaining bytes (stream done) - return 0; // wait for more data — no full page boundary yet - } - return boundary; - } - - /** - * Scan backward from `maxBytes` in `rawData` for the start of the last "OggS" capture pattern. - * Returns that byte offset (the page start), or 0 if none is found (caller waits for more data). - * Skips offset 0 itself: a segment that is only "everything up to the very first page" carries - * no page and should wait, matching the FLAC frame-scan's `> 0` discipline. - */ - private static findLastOggPage(rawData: Uint8Array, maxBytes: number): number { - const limit = Math.min(maxBytes, rawData.length); - for (let i = limit - 4; i > 0; i--) { - if (rawData[i] === OGG_CAPTURE[0] && - rawData[i + 1] === OGG_CAPTURE[1] && - rawData[i + 2] === OGG_CAPTURE[2] && - rawData[i + 3] === OGG_CAPTURE[3]) { - return i; - } - } - return 0; - } - - wrapSegment(info: FormatInfo, rawBytes: Uint8Array): Uint8Array { - const sidecar = OpusFormatDecoder.opusSeekData(info); - const setupBytes = sidecar?.setupHeaderBytes; - if (!setupBytes || setupBytes.length === 0) { - // Defensive: without setup bytes a mid-stream page run is undecodable. tryParseHeader - // always populates the sidecar on success, so this path should not occur in practice. - return rawBytes; - } - - // Prepend OpusHead/OpusTags so the page run is self-contained for decodeAudioData. - const result = new Uint8Array(setupBytes.length + rawBytes.length); - result.set(setupBytes, 0); - result.set(rawBytes, setupBytes.length); - return result; - } - - calculateByteOffset(info: FormatInfo, positionSeconds: number): number { - const sidecar = OpusFormatDecoder.opusSeekData(info); - if (!sidecar || sidecar.points.length === 0) { - // No index: degrade to start of audio (seek restarts) — same graceful fallback as FLAC. - return info.audioDataOffset; - } - - const points = sidecar.points; - const preSkip = sidecar.preSkip; - - // Binary search for the largest entry whose presentation time is <= target. Presentation - // time = max(0, (granule - preSkip) / 48000), matching 18.1's RFC 7845 math exactly. - let lo = 0, hi = points.length - 1, best = 0; - while (lo <= hi) { - const mid = (lo + hi) >> 1; - const t = presentationTimeSeconds(points[mid].granulePosition, preSkip); - if (t <= positionSeconds) { - best = mid; - lo = mid + 1; - } else { - hi = mid - 1; - } - } - - // byteOffset is already a file-absolute page-start offset in the Opus file — no header math - // to add (unlike FLAC's audio-relative stream_offset). Return it directly. - return points[best].byteOffset; - } - - private static opusSeekData(info: FormatInfo): OpusSeekData | null { - return info.seekData?.kind === 'opus-sidecar' ? info.seekData : null; - } -} diff --git a/DeepDrftPublic/Interop/audio/OpusSidecar.ts b/DeepDrftPublic/Interop/audio/OpusSidecar.ts index b952b98..ccb27b7 100644 --- a/DeepDrftPublic/Interop/audio/OpusSidecar.ts +++ b/DeepDrftPublic/Interop/audio/OpusSidecar.ts @@ -123,6 +123,39 @@ export function presentationTimeSeconds(granulePosition: number, preSkip: number return Math.max(0, (granulePosition - preSkip) / OPUS_SAMPLE_RATE); } +/** + * Resolve a seek time (seconds) to a file-absolute, page-start byte offset via the precomputed index — + * the accurate VBR-safe transfer function (§3.4a A/C). Binary-searches for the largest entry whose + * presentation time is <= `positionSeconds` and returns its exact page-start byte offset. NOT + * interpolation, NOT byteRate math. With an empty index it degrades to the start of audio (the offset + * of the first audio page == the setup-header length, since the server emits [setup pages][audio pages]). + * + * This is the single source of truth for Opus seek-offset math, shared by the seek-beyond-buffer path + * (AudioPlayer) and any byte-offset resolver. The Range fetch from this offset lands the decoder + * Ogg-sync-aligned because every indexed offset is a real page start. + */ +export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: number): number { + const points = sidecar.points; + if (points.length === 0) { + return sidecar.setupHeaderBytes.length; + } + + let lo = 0; + let hi = points.length - 1; + let best = 0; + while (lo <= hi) { + const mid = (lo + hi) >> 1; + const t = presentationTimeSeconds(points[mid].granulePosition, sidecar.preSkip); + if (t <= positionSeconds) { + best = mid; + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return points[best].byteOffset; +} + function toUint8Array(input: Uint8Array | ArrayBuffer | ArrayBufferView): Uint8Array { if (input instanceof Uint8Array) return input; if (input instanceof ArrayBuffer) return new Uint8Array(input); diff --git a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts new file mode 100644 index 0000000..c38cb3d --- /dev/null +++ b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts @@ -0,0 +1,380 @@ +/** + * Opus WebCodecs decode-path tests — the browser-independent pieces. + * + * The WebCodecs decode/playback/seek itself can only run in a real browser (verified by Daniel), so + * these tests cover the pure logic that surrounds it and that determines correctness: + * - OggSidecar parse: byte-for-byte round-trip against the C# wire format. + * - resolveOpusByteOffset: the seek transfer function (binary search over the precomputed index). + * - OggDemuxer: Ogg page -> Opus packet extraction (segment-table lacing, packets spanning pages, + * granule tracking, OpusHead/OpusTags setup-packet skipping, continuation reset). + * - extractOpusHead / opusHeadChannelCount: pulling the WebCodecs `description` out of the sidecar. + * + * There is no TS test runner configured in this repo (no package.json, no jest/vitest). This is a + * self-contained, zero-dependency test: a tiny inline assert harness, no `node:` imports beyond Buffer + * (Node global). It is EXCLUDED from the production tsc build (tsconfig `exclude: Interop/**\/*.test.ts`) + * so it never ships in wwwroot/js. To run it (Node 22+ strips TS types natively — no tsc, no deps), the + * `.js` import specifiers must resolve to the COMPILED modules, so run a copy from the compiled output: + * + * # 1. produce the compiled modules (the normal build already does this): + * dotnet build DeepDrftPublic/DeepDrftPublic.csproj + * # 2. run this test next to the compiled .js siblings (Node strips the types at load): + * cp DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts DeepDrftPublic/wwwroot/js/audio/ + * node DeepDrftPublic/wwwroot/js/audio/OpusStreamDecoder.test.ts + * + * A thrown error / non-zero exit signals failure; "ALL TESTS PASSED" signals success. + * + * The sidecar bytes built in `makeSidecar` reproduce the C# wire format byte-for-byte + * (DeepDrftContent.Processors.Opus.OpusSidecar.ToBytes / OggOpusSeekIndex.ToBytes): + * [uint32 setupHeaderLength][setup bytes] + * [uint64 totalByteLength][double totalDuration][uint32 count][uint16 preSkip][uint16 reserved] + * count x [uint64 granulePosition][uint64 byteOffset] — all little-endian. + */ + +import { parseSidecar, presentationTimeSeconds, resolveOpusByteOffset, OPUS_SAMPLE_RATE } from './OpusSidecar.js'; +import type { OpusSeekData } from './OpusSidecar.js'; +import { OggDemuxer, extractOpusHead, opusHeadChannelCount } from './OggDemuxer.js'; + +// --- tiny inline harness (no dependencies) --------------------------------------------------- +let passed = 0; +const failures: string[] = []; +function test(name: string, fn: () => void): void { + try { + fn(); + passed++; + } catch (e) { + failures.push(`FAIL: ${name}\n ${(e as Error).message}`); + } +} +function assertEqual(actual: unknown, expected: unknown, msg?: string): void { + if (actual !== expected) { + throw new Error(`${msg ?? 'assertEqual'}: expected ${String(expected)}, got ${String(actual)}`); + } +} +function assertArray(actual: ArrayLike, expected: number[], msg?: string): void { + const a = Array.from(actual); + if (a.length !== expected.length || a.some((v, i) => v !== expected[i])) { + throw new Error(`${msg ?? 'assertArray'}: expected [${expected}], got [${a}]`); + } +} +function assertNull(actual: unknown, msg?: string): void { + if (actual !== null) throw new Error(`${msg ?? 'assertNull'}: expected null, got ${String(actual)}`); +} +function assertNotNull(actual: T | null, msg?: string): T { + if (actual === null) throw new Error(`${msg ?? 'assertNotNull'}: got null`); + return actual; +} + +interface SidecarSpec { + setupHeader: number[]; + totalByteLength: number; + totalDuration: number; + preSkip: number; + points: Array<{ granule: number; byteOffset: number }>; +} + +/** Serialize a sidecar blob exactly as the C# OpusSidecar/OggOpusSeekIndex writers do. */ +function makeSidecar(spec: SidecarSpec): Uint8Array { + const SEEK_INDEX_HEADER_SIZE = 24; + const SEEK_POINT_SIZE = 16; + const setupLen = spec.setupHeader.length; + const total = 4 + setupLen + SEEK_INDEX_HEADER_SIZE + spec.points.length * SEEK_POINT_SIZE; + + const bytes = new Uint8Array(total); + const view = new DataView(bytes.buffer); + + view.setUint32(0, setupLen, true); + bytes.set(spec.setupHeader, 4); + + let p = 4 + setupLen; + writeUint64(view, p, spec.totalByteLength); + view.setFloat64(p + 8, spec.totalDuration, true); + view.setUint32(p + 16, spec.points.length, true); + view.setUint16(p + 20, spec.preSkip, true); + // bytes 22-23 reserved (zero) + + p += SEEK_INDEX_HEADER_SIZE; + for (const pt of spec.points) { + writeUint64(view, p, pt.granule); + writeUint64(view, p + 8, pt.byteOffset); + p += SEEK_POINT_SIZE; + } + return bytes; +} + +function writeUint64(view: DataView, offset: number, value: number): void { + view.setUint32(offset, value >>> 0, true); + view.setUint32(offset + 4, Math.floor(value / 0x100000000), true); +} + +// --- parseSidecar: byte-for-byte round-trip against the C# layout ----------------------------- + +test('parseSidecar round-trips the C# binary layout exactly', () => { + const setup = [0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64]; // "OpusHead" stand-in + const spec: SidecarSpec = { + setupHeader: setup, + totalByteLength: 1_234_567, + totalDuration: 212.5, + preSkip: 312, + points: [ + { granule: 312, byteOffset: 4096 }, // first point: granule == preSkip -> t=0 + { granule: 312 + 24000, byteOffset: 9000 }, // +0.5 s + { granule: 312 + 48000, byteOffset: 14000 }, // +1.0 s + ], + }; + + const parsed: OpusSeekData = assertNotNull(parseSidecar(makeSidecar(spec))); + assertEqual(parsed.kind, 'opus-sidecar', 'kind'); + assertArray(parsed.setupHeaderBytes, setup, 'setup header bytes'); + assertEqual(parsed.totalByteLength, spec.totalByteLength, 'totalByteLength'); + assertEqual(parsed.totalDurationSeconds, spec.totalDuration, 'totalDuration'); + assertEqual(parsed.preSkip, spec.preSkip, 'preSkip'); + assertEqual(parsed.points.length, 3, 'point count'); + assertEqual(parsed.points[1].granulePosition, 312 + 24000, 'point[1].granule'); + assertEqual(parsed.points[1].byteOffset, 9000, 'point[1].byteOffset'); +}); + +test('parseSidecar honours a borrowed view byteOffset (sidecar not at buffer start)', () => { + const blob = makeSidecar({ + setupHeader: [1, 2, 3, 4], + totalByteLength: 100, + totalDuration: 1.0, + preSkip: 0, + points: [{ granule: 0, byteOffset: 8 }], + }); + const padded = new Uint8Array(blob.length + 7); + padded.set(blob, 7); + const parsed = assertNotNull(parseSidecar(padded.subarray(7))); + assertArray(parsed.setupHeaderBytes, [1, 2, 3, 4], 'borrowed setup bytes'); + assertEqual(parsed.points[0].byteOffset, 8, 'borrowed point offset'); +}); + +test('parseSidecar returns null on a truncated blob', () => { + const blob = makeSidecar({ + setupHeader: [0], + totalByteLength: 1, + totalDuration: 0, + preSkip: 0, + points: [{ granule: 0, byteOffset: 0 }], + }); + assertNull(parseSidecar(blob.subarray(0, 3)), 'short of length prefix'); + assertNull(parseSidecar(blob.subarray(0, blob.length - 4)), 'declared count overruns'); +}); + +test('presentationTimeSeconds applies preSkip and clamps at zero (RFC 7845)', () => { + assertEqual(presentationTimeSeconds(312, 312), 0, 'granule == preSkip'); + assertEqual(presentationTimeSeconds(0, 312), 0, 'below preSkip clamps'); + assertEqual(presentationTimeSeconds(312 + OPUS_SAMPLE_RATE, 312), 1.0, '+48000 -> 1 s'); +}); + +// --- resolveOpusByteOffset: binary search over the precomputed index (exact, not interpolation) - + +function sidecarFrom(spec: SidecarSpec): OpusSeekData { + return assertNotNull(parseSidecar(makeSidecar(spec)), 'sidecar should parse'); +} + +test('resolveOpusByteOffset returns the page-start of the largest entry with time <= t', () => { + const points = [0, 1, 2, 3].map(i => ({ + granule: 1000 + i * (OPUS_SAMPLE_RATE / 2), + byteOffset: 4096 + i * 5000, + })); + const sc = sidecarFrom({ + setupHeader: [9, 9, 9, 9], totalByteLength: 999_999, totalDuration: 1.5, preSkip: 1000, points, + }); + assertEqual(resolveOpusByteOffset(sc, 0.0), 4096, 't=0 -> first point'); + assertEqual(resolveOpusByteOffset(sc, 0.4), 4096, 'just before bucket 1'); + assertEqual(resolveOpusByteOffset(sc, 0.5), 9096, 'exactly bucket 1'); + assertEqual(resolveOpusByteOffset(sc, 0.9), 9096, 'within bucket 1'); + assertEqual(resolveOpusByteOffset(sc, 1.0), 14096, 'exactly bucket 2'); + assertEqual(resolveOpusByteOffset(sc, 99), 19096, 'past end -> last point'); +}); + +test('resolveOpusByteOffset never interpolates between points', () => { + const sc = sidecarFrom({ + setupHeader: [0], totalByteLength: 10_000, totalDuration: 1.0, preSkip: 0, + points: [{ granule: 0, byteOffset: 100 }, { granule: OPUS_SAMPLE_RATE, byteOffset: 9000 }], + }); + assertEqual(resolveOpusByteOffset(sc, 0.5), 100, 'midpoint snaps to lower page start'); +}); + +test('resolveOpusByteOffset degrades to start of audio with an empty index', () => { + const sc = sidecarFrom({ + setupHeader: [1, 2, 3, 4, 5], totalByteLength: 0, totalDuration: 0, preSkip: 0, points: [], + }); + // start of audio == setup header length (server emits [setup pages][audio pages]). + assertEqual(resolveOpusByteOffset(sc, 10), 5, 'empty index degrades to audio start'); +}); + +// --- OggDemuxer: page -> packet extraction ---------------------------------------------------- +// +// Builds minimal Ogg pages by hand (no codec) so the lacing logic is exercised deterministically. + +interface PageSpec { + granule: number; // -1 (0xFFFF...) means "no granule" + continued?: boolean; // header-type bit 0x01 + eos?: boolean; // header-type bit 0x04 + /** Packet payloads to lace into this page (each split into 255-byte segments per Ogg rules). */ + packets?: Uint8Array[]; + /** Raw segment lengths + payload, for hand-crafting page-spanning packets. */ + rawSegments?: number[]; + rawPayload?: Uint8Array; +} + +function buildPage(spec: PageSpec): Uint8Array { + let segTable: number[]; + let payload: Uint8Array; + + if (spec.rawSegments && spec.rawPayload) { + segTable = spec.rawSegments; + payload = spec.rawPayload; + } else { + segTable = []; + const chunks: number[] = []; + for (const pkt of spec.packets ?? []) { + let remaining = pkt.length; + let o = 0; + // Lace: emit 255-byte segments until the final (< 255) segment terminates the packet. + for (;;) { + const seg = Math.min(255, remaining); + segTable.push(seg); + for (let i = 0; i < seg; i++) chunks.push(pkt[o + i]); + o += seg; + remaining -= seg; + if (seg < 255) break; // terminating segment + } + } + payload = new Uint8Array(chunks); + } + + const header = new Uint8Array(OGG_HDR + segTable.length + payload.length); + header.set([0x4f, 0x67, 0x67, 0x53], 0); // "OggS" + header[4] = 0; // version + header[5] = (spec.continued ? 0x01 : 0) | (spec.eos ? 0x04 : 0); + // granule (LE uint64) + if (spec.granule < 0) { + for (let i = 0; i < 8; i++) header[6 + i] = 0xff; + } else { + let g = spec.granule; + for (let i = 0; i < 8; i++) { header[6 + i] = g & 0xff; g = Math.floor(g / 256); } + } + header[26] = segTable.length; + header.set(segTable, OGG_HDR); + header.set(payload, OGG_HDR + segTable.length); + return header; +} +const OGG_HDR = 27; + +function opusHeadPacket(channels: number, preSkip: number): Uint8Array { + // "OpusHead"(8) version(1) channels(1) preSkip(2 LE) inputRate(4) gain(2) mapping(1) = 19 bytes + const p = new Uint8Array(19); + p.set([0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64], 0); + p[8] = 1; + p[9] = channels; + p[10] = preSkip & 0xff; + p[11] = (preSkip >> 8) & 0xff; + return p; +} +function opusTagsPacket(): Uint8Array { + const p = new Uint8Array(16); + p.set([0x4f, 0x70, 0x75, 0x73, 0x54, 0x61, 0x67, 0x73], 0); // "OpusTags" + return p; +} + +test('OggDemuxer skips OpusHead/OpusTags and returns audio packets with the page granule', () => { + const head = buildPage({ granule: 0, packets: [opusHeadPacket(2, 312)] }); + const tags = buildPage({ granule: 0, packets: [opusTagsPacket()] }); + const audio = buildPage({ granule: 24000, packets: [new Uint8Array([0xaa, 0xbb]), new Uint8Array([0xcc])] }); + + const d = new OggDemuxer(); + const packets = d.push(concat([head, tags, audio])); + assertEqual(packets.length, 2, 'two audio packets, setup skipped'); + assertArray(packets[0].data, [0xaa, 0xbb], 'first audio packet bytes'); + assertEqual(packets[0].pageGranule, null, 'non-final packet carries no granule'); + assertArray(packets[1].data, [0xcc], 'second audio packet bytes'); + assertEqual(packets[1].pageGranule, 24000, 'final completing packet carries the page granule'); + assertEqual(packets[1].isLastPage, false, 'not EOS'); +}); + +test('OggDemuxer flags the EOS page', () => { + const head = buildPage({ granule: 0, packets: [opusHeadPacket(1, 100)] }); + const tags = buildPage({ granule: 0, packets: [opusTagsPacket()] }); + const audio = buildPage({ granule: 48000, eos: true, packets: [new Uint8Array([0x01])] }); + const d = new OggDemuxer(); + const packets = d.push(concat([head, tags, audio])); + assertEqual(packets.length, 1, 'one audio packet'); + assertEqual(packets[0].isLastPage, true, 'EOS flagged'); +}); + +test('OggDemuxer reassembles a packet that spans two pages (255 last segment + continuation)', () => { + const head = buildPage({ granule: 0, packets: [opusHeadPacket(2, 0)] }); + const tags = buildPage({ granule: 0, packets: [opusTagsPacket()] }); + // First audio page: one 255-byte segment that does NOT terminate (packet continues). + const part1 = new Uint8Array(255).fill(0x11); + const pageA = buildPage({ granule: -1, rawSegments: [255], rawPayload: part1 }); + // Second page (continued): a 10-byte terminating segment completes the packet. + const part2 = new Uint8Array(10).fill(0x22); + const pageB = buildPage({ granule: 24000, continued: true, rawSegments: [10], rawPayload: part2 }); + + const d = new OggDemuxer(); + const packets = d.push(concat([head, tags, pageA, pageB])); + assertEqual(packets.length, 1, 'one reassembled packet'); + assertEqual(packets[0].data.length, 265, 'packet is 255 + 10 bytes'); + assertEqual(packets[0].data[0], 0x11, 'first byte from page A'); + assertEqual(packets[0].data[264], 0x22, 'last byte from page B'); + assertEqual(packets[0].pageGranule, 24000, 'granule from the completing page'); +}); + +test('OggDemuxer handles bytes split across push() calls (page straddles a network chunk)', () => { + const head = buildPage({ granule: 0, packets: [opusHeadPacket(2, 0)] }); + const tags = buildPage({ granule: 0, packets: [opusTagsPacket()] }); + const audio = buildPage({ granule: 960, packets: [new Uint8Array([0x07, 0x08, 0x09])] }); + const full = concat([head, tags, audio]); + + const d = new OggDemuxer(); + const cut = full.length - 2; // split mid-audio-page + const first = d.push(full.subarray(0, cut)); + assertEqual(first.length, 0, 'no whole audio packet yet'); + const second = d.push(full.subarray(cut)); + assertEqual(second.length, 1, 'audio packet completes on the second push'); + assertArray(second[0].data, [0x07, 0x08, 0x09], 'reassembled across pushes'); +}); + +test('OggDemuxer.reset(continuation) treats the first page as audio (no setup expected)', () => { + const audio = buildPage({ granule: 96000, packets: [new Uint8Array([0x42])] }); + const d = new OggDemuxer(); + d.reset(true); + const packets = d.push(audio); + assertEqual(packets.length, 1, 'continuation: first page is audio'); + assertArray(packets[0].data, [0x42], 'audio packet bytes'); +}); + +// --- extractOpusHead / opusHeadChannelCount: WebCodecs description from the sidecar ----------- + +test('extractOpusHead returns the OpusHead packet from the setup pages', () => { + const head = buildPage({ granule: 0, packets: [opusHeadPacket(2, 312)] }); + const tags = buildPage({ granule: 0, packets: [opusTagsPacket()] }); + const setup = concat([head, tags]); + const opusHead = assertNotNull(extractOpusHead(setup), 'OpusHead extracted'); + assertArray(opusHead.subarray(0, 8), [0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64], 'OpusHead magic'); + assertEqual(opusHeadChannelCount(opusHead), 2, 'channel count'); +}); + +test('extractOpusHead returns null when no OpusHead page is present', () => { + const tags = buildPage({ granule: 0, packets: [opusTagsPacket()] }); + assertNull(extractOpusHead(tags), 'no OpusHead'); +}); + +function concat(arrs: Uint8Array[]): Uint8Array { + let len = 0; + for (const a of arrs) len += a.length; + const out = new Uint8Array(len); + let o = 0; + for (const a of arrs) { out.set(a, o); o += a.length; } + return out; +} + +// --- report ---------------------------------------------------------------------------------- +if (failures.length > 0) { + console.error(failures.join('\n')); + throw new Error(`${failures.length} test(s) failed, ${passed} passed`); +} +console.log(`ALL ${passed} TESTS PASSED`); diff --git a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts new file mode 100644 index 0000000..b8fb03c --- /dev/null +++ b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts @@ -0,0 +1,262 @@ +/** + * OpusStreamDecoder - the WebCodecs streaming Opus decode pipeline. + * + * This replaces the fundamentally-broken per-segment `decodeAudioData` Opus model. Instead of cutting + * the Ogg stream into page-runs and decoding each as a standalone file (which re-applies pre-skip and + * starts from cold codec state at every boundary), it feeds a single stateful WebCodecs `AudioDecoder` + * the demuxed Opus packets in order — correct pre-skip-once handling and full inter-frame continuity. + * + * Pipeline: OggDemuxer (pages -> Opus packets + granule) -> AudioDecoder (codec 'opus', configured + * from the OpusHead in the sidecar) -> AudioData (48 kHz PCM) -> AudioBuffer -> PlaybackScheduler. + * + * Pre-skip (encoder delay): handled ONCE, by the decoder. WebCodecs decodes Opus with the OpusHead + * passed as `AudioDecoderConfig.description`; the OpusHead carries `pre_skip`, and the WebCodecs Opus + * decoder discards those leading samples itself. We do NOT re-trim per packet — doing so on top of the + * decoder's own trim would double-count. This is the spec-intended path (W3C WebCodecs Opus registration). + * + * End-trim: the sidecar's `totalDurationSeconds` is the exact pre-skip-corrected stream length. We cap + * cumulative emitted audio at that length so the final partial frame's padding does not leak past the + * true end. (Granule-position end-trim from the EOS page is the alternative; capping on the known total + * is equivalent and simpler, and the sidecar total is authoritative.) + * + * Sample rate: Opus always decodes at 48 kHz (RFC 7845). We force the AudioContext to 48 kHz at init so + * the decoded AudioData needs no resampling before scheduling — the same `recreateWithSampleRate` seam + * the WAV path uses for non-44.1 sources. + * + * BROWSER-VERIFIED. The actual decode/playback/trim correctness is verified in Daniel's browser + * (WebCodecs cannot run in Node/headless here). The Ogg demux, packet timing, and end-trim *math* are + * unit-tested; the WebCodecs glue (configure/decode/flush/AudioData->AudioBuffer) is browser-verified. + */ + +import { AudioContextManager } from './AudioContextManager.js'; +import { IStreamingDecoder } from './IStreamingDecoder.js'; +import { OggDemuxer, OpusPacket, extractOpusHead, opusHeadChannelCount } from './OggDemuxer.js'; +import { OpusSeekData, OPUS_SAMPLE_RATE } from './OpusSidecar.js'; + +/** Opus packet duration ceiling is 120 ms; at 48 kHz that is 5760 frames. Used only for chunk timestamps. */ +const MAX_PACKET_FRAMES = 5760; + +export class OpusStreamDecoder implements IStreamingDecoder { + private readonly contextManager: AudioContextManager; + private readonly sidecar: OpusSeekData; + + private demuxer = new OggDemuxer(); + private decoder: AudioDecoder | null = null; + private channelCount = 2; + private configured = false; + // OpusHead bytes used as the AudioDecoder `description`, captured once at first configure and reused + // verbatim on a range-continuation reconfigure (avoids re-extracting / a non-null assertion). + private opusHeadDescription: Uint8Array | null = null; + + // Decoded AudioData awaiting conversion, filled by the AudioDecoder output callback. + private decodedQueue: AudioData[] = []; + private fatalError = false; + + // Monotonic packet timestamp (microseconds) handed to each EncodedAudioChunk. WebCodecs requires + // strictly increasing timestamps; the true value is irrelevant to us (we schedule by accumulation), + // so a synthetic 48 kHz-derived counter suffices and stays exact. + private nextTimestampUs = 0; + + // Cumulative frames already emitted as AudioBuffers, for end-trim against the known total length. + private emittedFrames = 0; + private readonly totalFrames: number; + + constructor(contextManager: AudioContextManager, sidecar: OpusSeekData) { + this.contextManager = contextManager; + this.sidecar = sidecar; + this.totalFrames = sidecar.totalDurationSeconds > 0 + ? Math.round(sidecar.totalDurationSeconds * OPUS_SAMPLE_RATE) + : Number.POSITIVE_INFINITY; + } + + get hasFatalError(): boolean { + return this.fatalError; + } + + get ready(): boolean { + return this.configured; + } + + get totalDuration(): number | null { + return this.sidecar.totalDurationSeconds > 0 ? this.sidecar.totalDurationSeconds : null; + } + + /** + * Lazily build + configure the WebCodecs decoder from the sidecar's OpusHead. Idempotent. Forces the + * AudioContext to 48 kHz so decoded AudioData schedules without resampling. Returns false on a config + * the browser cannot support (caller should never reach here — the capability gate runs first — but + * we fail safe rather than throw into the stream loop). + */ + private async ensureConfigured(): Promise { + if (this.configured) return true; + if (typeof AudioDecoder === 'undefined') { + this.fatalError = true; + return false; + } + + const opusHead = extractOpusHead(this.sidecar.setupHeaderBytes); + if (!opusHead) { + this.fatalError = true; + return false; + } + this.channelCount = opusHeadChannelCount(opusHead); + // Copy the OpusHead into a standalone buffer — the sidecar subarray is a view we keep. + this.opusHeadDescription = opusHead.slice(); + + // Opus decodes at 48 kHz; align the context so no resample is needed. + if (this.contextManager.sampleRate !== OPUS_SAMPLE_RATE) { + await this.contextManager.recreateWithSampleRate(OPUS_SAMPLE_RATE); + } + + this.decoder = new AudioDecoder({ + output: (data) => this.decodedQueue.push(data), + error: (err) => { + console.error('Opus AudioDecoder error:', err.message); + this.fatalError = true; + } + }); + this.decoder.configure(this.buildConfig()); + this.configured = true; + return true; + } + + private buildConfig(): AudioDecoderConfig { + return { + codec: 'opus', + sampleRate: OPUS_SAMPLE_RATE, + numberOfChannels: this.channelCount, + description: this.opusHeadDescription ?? undefined + }; + } + + async push(chunk: Uint8Array): Promise { + if (this.fatalError) return []; + if (!(await this.ensureConfigured())) return []; + + const packets = this.demuxer.push(chunk); + this.decodePackets(packets); + // Give the WebCodecs output callback a chance to run before we drain. + await this.yieldToDecoder(); + return this.drainDecoded(false); + } + + async complete(): Promise { + if (this.fatalError || !this.decoder || this.decoder.state !== 'configured') { + return this.drainDecoded(true); + } + try { + await this.decoder.flush(); + } catch (err) { + // A flush can reject if the decoder was reset/closed concurrently (track switch); the loop's + // own cancellation handles that — surface nothing, just drain what we have. + console.warn('Opus decoder flush interrupted:', (err as Error).message); + } + return this.drainDecoded(true); + } + + reinitializeForRangeContinuation(): void { + // New 206 body starts on a page boundary with no setup pages; the codec config is unchanged but + // inter-frame state must restart cleanly. AudioDecoder.reset() drops queued work and returns the + // decoder to 'unconfigured', so we reconfigure with the cached config. The demuxer goes into + // continuation mode (treat the first page's packets as audio — no setup pages in a 206 body). + this.demuxer.reset(true); + this.decodedQueue = []; + this.emittedFrames = 0; // post-seek buffers are positioned by the scheduler's playbackOffset + if (this.decoder && this.decoder.state === 'configured') { + this.decoder.reset(); + this.decoder.configure(this.buildConfig()); + } + } + + dispose(): void { + for (const d of this.decodedQueue) { + try { d.close(); } catch { /* already closed */ } + } + this.decodedQueue = []; + if (this.decoder && this.decoder.state !== 'closed') { + try { this.decoder.close(); } catch { /* already closed */ } + } + this.decoder = null; + this.configured = false; + } + + private decodePackets(packets: OpusPacket[]): void { + if (!this.decoder || this.decoder.state !== 'configured') return; + for (const pkt of packets) { + if (pkt.data.length === 0) continue; + // Every Opus packet is independently a "key" frame at the container level for WebCodecs's + // purposes — Opus has no key/delta distinction; 'key' is the correct type for all packets. + const chunk = new EncodedAudioChunk({ + type: 'key', + timestamp: this.nextTimestampUs, + data: pkt.data + }); + // Advance the synthetic clock by a packet's max duration; exact value is immaterial to us. + this.nextTimestampUs += Math.round((MAX_PACKET_FRAMES / OPUS_SAMPLE_RATE) * 1_000_000); + try { + this.decoder.decode(chunk); + } catch (err) { + console.error('Opus decode() threw:', (err as Error).message); + this.fatalError = true; + return; + } + } + } + + /** + * Convert every queued AudioData into an AudioBuffer at the context sample rate, applying end-trim + * against the known total frame count. `final` allows the very last partial buffer to be emitted. + */ + private drainDecoded(_final: boolean): AudioBuffer[] { + const out: AudioBuffer[] = []; + const ctx = this.contextManager.getContext(); + + while (this.decodedQueue.length > 0) { + const data = this.decodedQueue.shift()!; + try { + const buffer = this.audioDataToBuffer(ctx, data); + if (buffer) out.push(buffer); + } finally { + try { data.close(); } catch { /* already closed */ } + } + } + return out; + } + + /** + * Copy an AudioData's PCM into a new AudioBuffer, trimming to not exceed the known total length + * (end-trim). Returns null if the trim leaves zero frames (the buffer is entirely past the end). + */ + private audioDataToBuffer(ctx: BaseAudioContext, data: AudioData): AudioBuffer | null { + const frames = data.numberOfFrames; + const channels = data.numberOfChannels; + + // End-trim: cap cumulative output at totalFrames. + let keep = frames; + if (Number.isFinite(this.totalFrames)) { + const room = this.totalFrames - this.emittedFrames; + if (room <= 0) return null; + if (room < frames) keep = room; + } + if (keep <= 0) return null; + + const buffer = ctx.createBuffer(channels, keep, data.sampleRate); + const plane = new Float32Array(frames); // copyTo fills the full frame count, then we slice + for (let ch = 0; ch < channels; ch++) { + data.copyTo(plane, { planeIndex: ch, format: 'f32-planar' }); + buffer.copyToChannel(keep === frames ? plane : plane.subarray(0, keep), ch); + } + this.emittedFrames += keep; + return buffer; + } + + /** + * Yield to the microtask/event loop so the synchronous decode() calls above let their async output + * callbacks fire before we drain. A zero-delay timeout (macrotask) is the reliable cross-engine way + * to let WebCodecs deliver outputs; awaiting decodeQueueSize draining is the precise alternative but + * not all engines settle it synchronously. + */ + private yieldToDecoder(): Promise { + return new Promise((resolve) => setTimeout(resolve, 0)); + } +} diff --git a/DeepDrftPublic/Interop/audio/index.ts b/DeepDrftPublic/Interop/audio/index.ts index c56a134..dd863b9 100644 --- a/DeepDrftPublic/Interop/audio/index.ts +++ b/DeepDrftPublic/Interop/audio/index.ts @@ -47,9 +47,9 @@ const DeepDrftAudio = { return player.setOpusSidecar(sidecarBytes); }, - // Capability seam (wave 18.4). Resolves whether this browser can decode Ogg Opus via - // decodeAudioData (Safari < 18.4 cannot). Wave 18.5 / 18.6 consume this to choose lossless - // when unsupported; this module only reports the capability. + // Capability seam. Resolves whether this browser can stream-decode Ogg Opus via WebCodecs + // (AudioDecoder + codec:'opus'; Safari < 16.4 / older Firefox cannot). The player consumes this + // to choose lossless when unsupported; this module only reports the capability. canDecodeOggOpus: (): Promise => canDecodeOggOpus(), processStreamingChunk: async (playerId: string, chunk: Uint8Array): Promise => { diff --git a/DeepDrftPublic/Interop/audio/webcodecs.d.ts b/DeepDrftPublic/Interop/audio/webcodecs.d.ts new file mode 100644 index 0000000..8589c88 --- /dev/null +++ b/DeepDrftPublic/Interop/audio/webcodecs.d.ts @@ -0,0 +1,82 @@ +/** + * Minimal ambient WebCodecs declarations. + * + * TypeScript 5.9's bundled lib.dom.d.ts does NOT yet ship the WebCodecs audio types + * (`AudioDecoder`, `EncodedAudioChunk`, `AudioData`, `AudioDecoderConfig`), and this repo has no + * package.json / node_modules to pull in `@types/dom-webcodecs`. Rather than add a dependency + * toolchain for one feature, this declares exactly the slice of the WebCodecs surface the Opus + * streaming decoder uses — nothing more. The shapes follow the W3C WebCodecs spec. + * + * These are runtime-optional: `AudioDecoder` is absent on Safari < 16.4 and older Firefox, so every + * use site guards on `typeof AudioDecoder !== 'undefined'` before touching it (the capability gate). + */ + +interface AudioDecoderConfig { + codec: string; + sampleRate: number; + numberOfChannels: number; + /** Codec-specific setup bytes. For Opus this is the OpusHead identification header. */ + description?: BufferSource; +} + +interface AudioDecoderSupport { + supported: boolean; + config: AudioDecoderConfig; +} + +type AudioSampleFormat = 'u8' | 's16' | 's24' | 's32' | 'f32' | 'u8-planar' | 's16-planar' | 's24-planar' | 's32-planar' | 'f32-planar'; + +interface AudioDataCopyToOptions { + planeIndex: number; + frameOffset?: number; + frameCount?: number; + format?: AudioSampleFormat; +} + +interface AudioData { + readonly format: AudioSampleFormat | null; + readonly sampleRate: number; + readonly numberOfFrames: number; + readonly numberOfChannels: number; + readonly duration: number; + /** Presentation timestamp in microseconds. */ + readonly timestamp: number; + allocationSize(options: AudioDataCopyToOptions): number; + copyTo(destination: BufferSource, options: AudioDataCopyToOptions): void; + close(): void; +} + +interface EncodedAudioChunkInit { + type: 'key' | 'delta'; + /** Presentation timestamp in microseconds. */ + timestamp: number; + duration?: number; + data: BufferSource; +} + +declare class EncodedAudioChunk { + constructor(init: EncodedAudioChunkInit); + readonly type: 'key' | 'delta'; + readonly timestamp: number; + readonly duration: number | null; + readonly byteLength: number; +} + +interface AudioDecoderInit { + output: (data: AudioData) => void; + error: (error: DOMException) => void; +} + +type CodecState = 'unconfigured' | 'configured' | 'closed'; + +declare class AudioDecoder { + constructor(init: AudioDecoderInit); + readonly state: CodecState; + readonly decodeQueueSize: number; + configure(config: AudioDecoderConfig): void; + decode(chunk: EncodedAudioChunk): void; + flush(): Promise; + reset(): void; + close(): void; + static isConfigSupported(config: AudioDecoderConfig): Promise; +} From 5a75da1769b1cd2b485afebaf5d4287930ed0f4c Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Tue, 23 Jun 2026 20:57:05 -0400 Subject: [PATCH 2/2] fix: AC9 seek fine re-sync + deterministic decoder drain (WebCodecs Opus) Seek now trims the lead-in so playback lands at the requested time, not the page start; decoder drain polls decodeQueueSize (bounded) instead of a single timeout. Minor cleanups. --- DeepDrftPublic/Interop/audio/AudioPlayer.ts | 22 +++-- .../Interop/audio/IStreamingDecoder.ts | 7 +- DeepDrftPublic/Interop/audio/OggDemuxer.ts | 3 + DeepDrftPublic/Interop/audio/OpusSidecar.ts | 34 +++++-- .../Interop/audio/OpusStreamDecoder.test.ts | 69 ++++++++++++-- .../Interop/audio/OpusStreamDecoder.ts | 93 +++++++++++++++---- 6 files changed, 186 insertions(+), 42 deletions(-) diff --git a/DeepDrftPublic/Interop/audio/AudioPlayer.ts b/DeepDrftPublic/Interop/audio/AudioPlayer.ts index 864291c..354389a 100644 --- a/DeepDrftPublic/Interop/audio/AudioPlayer.ts +++ b/DeepDrftPublic/Interop/audio/AudioPlayer.ts @@ -16,7 +16,7 @@ import { WavFormatDecoder } from './WavFormatDecoder.js'; import { Mp3FormatDecoder } from './Mp3FormatDecoder.js'; import { FlacFormatDecoder } from './FlacFormatDecoder.js'; import { OpusStreamDecoder } from './OpusStreamDecoder.js'; -import { OpusSeekData, parseSidecar, resolveOpusByteOffset } from './OpusSidecar.js'; +import { OpusSeekData, parseSidecar, resolveOpusByteOffset, OpusSeekResolution } from './OpusSidecar.js'; export interface AudioResult { success: boolean; @@ -56,6 +56,10 @@ export class AudioPlayer { // The sidecar in effect for the active Opus stream (its seek index resolves byte offsets). Distinct // from pendingOpusSidecar, which is the one set for the NEXT stream init. private activeOpusSidecar: OpusSeekData | null = null; + // The landing time of the most recent seek-beyond-buffer page resolution (seconds). Set by + // seekBeyondBuffer, consumed by reinitializeFromOffset to trim leading decoded frames so the + // audible position matches the requested seek target (AC9 fine re-sync, §3.4a step 4). + private _seekLandingTime: number = 0; // Playback state private isPlaying: boolean = false; @@ -453,15 +457,19 @@ export class AudioPlayer { try { // Opus: resolve the offset from the precomputed seek index (the accurate VBR-safe transfer // function). The returned offset is a real page start, so the Range continuation lands the - // demuxer/decoder Ogg-sync-aligned. + // demuxer/decoder Ogg-sync-aligned. Also capture the landing time (t_page ≤ position) so + // reinitializeFromOffset can trim the leading decoded frames and land precisely at `position` + // (AC9 fine re-sync, §3.4a step 4). if (this.opusDecoder) { if (!this.activeOpusSidecar) { return { success: false, error: 'Cannot calculate byte offset' }; } + const resolution: OpusSeekResolution = resolveOpusByteOffset(this.activeOpusSidecar, position); + this._seekLandingTime = resolution.landingTimeSeconds; return { success: true, seekBeyondBuffer: true, - byteOffset: resolveOpusByteOffset(this.activeOpusSidecar, position) + byteOffset: resolution.byteOffset }; } @@ -499,7 +507,7 @@ export class AudioPlayer { calculateByteOffset(positionSeconds: number): number { if (this.opusDecoder) { return this.activeOpusSidecar - ? resolveOpusByteOffset(this.activeOpusSidecar, positionSeconds) + ? resolveOpusByteOffset(this.activeOpusSidecar, positionSeconds).byteOffset : 0; } if (!this.streamDecoder.getFormatInfo()) return 0; @@ -521,10 +529,11 @@ export class AudioPlayer { this.scheduler.setPlaybackOffset(seekPosition); // Reinitialize the active decoder for the Range-continuation stream (206 body, no header/ - // setup pages). Opus resets demux + codec state (keeping the cached config); the + // setup pages). Opus resets demux + codec state (keeping the cached config) and arms the + // lead-trim so decoded audio starts at `seekPosition`, not at the page boundary (AC9). The // StreamDecoder path uses totalStreamLength (the 206 Content-Length) to detect completion. if (this.opusDecoder) { - this.opusDecoder.reinitializeForRangeContinuation(); + this.opusDecoder.reinitializeForRangeContinuation(this._seekLandingTime, seekPosition); } else { this.streamDecoder.reinitializeForRangeContinuation(totalStreamLength); } @@ -643,6 +652,7 @@ export class AudioPlayer { this.isStreamingMode = false; this.streamingStarted = false; this.streamingCompleted = false; + this._seekLandingTime = 0; } private handlePlaybackEnded(): void { diff --git a/DeepDrftPublic/Interop/audio/IStreamingDecoder.ts b/DeepDrftPublic/Interop/audio/IStreamingDecoder.ts index 2fda588..27a118f 100644 --- a/DeepDrftPublic/Interop/audio/IStreamingDecoder.ts +++ b/DeepDrftPublic/Interop/audio/IStreamingDecoder.ts @@ -50,8 +50,13 @@ export interface IStreamingDecoder { * Reinitialize for a Range-continuation after seek-beyond-buffer. The 206 body begins on an Ogg page * boundary and carries no setup pages — the decoder reuses the cached config and resets demux/codec * state so inter-frame continuity restarts cleanly from the new offset. + * + * @param landingTimeSeconds The actual presentation time of the resolved seek page (t_page ≤ target). + * @param targetTimeSeconds The user-requested seek position. The decoder trims the leading + * `(target - landing) * sampleRate` frames so playback lands at target + * (AC9 fine re-sync, §3.4a step 4). */ - reinitializeForRangeContinuation(): void; + reinitializeForRangeContinuation(landingTimeSeconds: number, targetTimeSeconds: number): void; /** Tear down the underlying WebCodecs decoder and release resources. */ dispose(): void; diff --git a/DeepDrftPublic/Interop/audio/OggDemuxer.ts b/DeepDrftPublic/Interop/audio/OggDemuxer.ts index e9c9038..b3ef6ab 100644 --- a/DeepDrftPublic/Interop/audio/OggDemuxer.ts +++ b/DeepDrftPublic/Interop/audio/OggDemuxer.ts @@ -155,6 +155,9 @@ export class OggDemuxer { const isEos = (headerType & 0x04) !== 0; const granule = readUint64LE(header, GRANULE_OFFSET); // 0xFFFFFFFFFFFFFFFF (-1) means "no packet completed on this page" — no usable timestamp. + // We check the raw bytes rather than comparing `granule === -1` (or the equivalent JS number): + // the full 64-bit sentinel exceeds 2^53 and cannot be represented exactly as an IEEE-754 double, + // so the parsed value from readUint64LE would not equal the sentinel. The byte check is exact. const hasGranule = !(header[GRANULE_OFFSET] === 0xff && header[GRANULE_OFFSET + 1] === 0xff && header[GRANULE_OFFSET + 2] === 0xff && header[GRANULE_OFFSET + 3] === 0xff && header[GRANULE_OFFSET + 4] === 0xff && header[GRANULE_OFFSET + 5] === 0xff && diff --git a/DeepDrftPublic/Interop/audio/OpusSidecar.ts b/DeepDrftPublic/Interop/audio/OpusSidecar.ts index ccb27b7..3259367 100644 --- a/DeepDrftPublic/Interop/audio/OpusSidecar.ts +++ b/DeepDrftPublic/Interop/audio/OpusSidecar.ts @@ -123,21 +123,40 @@ export function presentationTimeSeconds(granulePosition: number, preSkip: number return Math.max(0, (granulePosition - preSkip) / OPUS_SAMPLE_RATE); } +/** + * Result of resolving a seek time to a page-start byte offset. + * `byteOffset` is the Range request origin; `landingTimeSeconds` is the actual presentation time of that + * page (t_page ≤ positionSeconds). The caller uses the delta `positionSeconds - landingTimeSeconds` to + * trim the decoded leading frames so playback lands at the requested position, not at t_page (AC9). + */ +export interface OpusSeekResolution { + /** Page-start byte offset to use as the Range request origin (Ogg-sync-aligned). */ + byteOffset: number; + /** + * Presentation time of the resolved index page (seconds). Always ≤ positionSeconds. The decoder + * must trim `(positionSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE` leading frames so the + * audible start and the reported clock both land at positionSeconds, not at landingTimeSeconds. + */ + landingTimeSeconds: number; +} + /** * Resolve a seek time (seconds) to a file-absolute, page-start byte offset via the precomputed index — * the accurate VBR-safe transfer function (§3.4a A/C). Binary-searches for the largest entry whose - * presentation time is <= `positionSeconds` and returns its exact page-start byte offset. NOT - * interpolation, NOT byteRate math. With an empty index it degrades to the start of audio (the offset - * of the first audio page == the setup-header length, since the server emits [setup pages][audio pages]). + * presentation time is <= `positionSeconds`. Returns both the page-start byte offset AND the actual + * landing time of that page, so callers can trim leading decoded frames to land precisely at + * `positionSeconds` (AC9 fine re-sync). NOT interpolation, NOT byteRate math. + * + * With an empty index it degrades to the start of audio (offset == setup-header length, landing == 0). * * This is the single source of truth for Opus seek-offset math, shared by the seek-beyond-buffer path * (AudioPlayer) and any byte-offset resolver. The Range fetch from this offset lands the decoder * Ogg-sync-aligned because every indexed offset is a real page start. */ -export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: number): number { +export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: number): OpusSeekResolution { const points = sidecar.points; if (points.length === 0) { - return sidecar.setupHeaderBytes.length; + return { byteOffset: sidecar.setupHeaderBytes.length, landingTimeSeconds: 0 }; } let lo = 0; @@ -153,7 +172,10 @@ export function resolveOpusByteOffset(sidecar: OpusSeekData, positionSeconds: nu hi = mid - 1; } } - return points[best].byteOffset; + return { + byteOffset: points[best].byteOffset, + landingTimeSeconds: presentationTimeSeconds(points[best].granulePosition, sidecar.preSkip) + }; } function toUint8Array(input: Uint8Array | ArrayBuffer | ArrayBufferView): Uint8Array { diff --git a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts index c38cb3d..4a4ffc2 100644 --- a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts +++ b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.test.ts @@ -31,7 +31,7 @@ */ import { parseSidecar, presentationTimeSeconds, resolveOpusByteOffset, OPUS_SAMPLE_RATE } from './OpusSidecar.js'; -import type { OpusSeekData } from './OpusSidecar.js'; +import type { OpusSeekData, OpusSeekResolution } from './OpusSidecar.js'; import { OggDemuxer, extractOpusHead, opusHeadChannelCount } from './OggDemuxer.js'; // --- tiny inline harness (no dependencies) --------------------------------------------------- @@ -180,12 +180,12 @@ test('resolveOpusByteOffset returns the page-start of the largest entry with tim const sc = sidecarFrom({ setupHeader: [9, 9, 9, 9], totalByteLength: 999_999, totalDuration: 1.5, preSkip: 1000, points, }); - assertEqual(resolveOpusByteOffset(sc, 0.0), 4096, 't=0 -> first point'); - assertEqual(resolveOpusByteOffset(sc, 0.4), 4096, 'just before bucket 1'); - assertEqual(resolveOpusByteOffset(sc, 0.5), 9096, 'exactly bucket 1'); - assertEqual(resolveOpusByteOffset(sc, 0.9), 9096, 'within bucket 1'); - assertEqual(resolveOpusByteOffset(sc, 1.0), 14096, 'exactly bucket 2'); - assertEqual(resolveOpusByteOffset(sc, 99), 19096, 'past end -> last point'); + assertEqual(resolveOpusByteOffset(sc, 0.0).byteOffset, 4096, 't=0 -> first point'); + assertEqual(resolveOpusByteOffset(sc, 0.4).byteOffset, 4096, 'just before bucket 1'); + assertEqual(resolveOpusByteOffset(sc, 0.5).byteOffset, 9096, 'exactly bucket 1'); + assertEqual(resolveOpusByteOffset(sc, 0.9).byteOffset, 9096, 'within bucket 1'); + assertEqual(resolveOpusByteOffset(sc, 1.0).byteOffset, 14096, 'exactly bucket 2'); + assertEqual(resolveOpusByteOffset(sc, 99).byteOffset, 19096, 'past end -> last point'); }); test('resolveOpusByteOffset never interpolates between points', () => { @@ -193,7 +193,7 @@ test('resolveOpusByteOffset never interpolates between points', () => { setupHeader: [0], totalByteLength: 10_000, totalDuration: 1.0, preSkip: 0, points: [{ granule: 0, byteOffset: 100 }, { granule: OPUS_SAMPLE_RATE, byteOffset: 9000 }], }); - assertEqual(resolveOpusByteOffset(sc, 0.5), 100, 'midpoint snaps to lower page start'); + assertEqual(resolveOpusByteOffset(sc, 0.5).byteOffset, 100, 'midpoint snaps to lower page start'); }); test('resolveOpusByteOffset degrades to start of audio with an empty index', () => { @@ -201,7 +201,58 @@ test('resolveOpusByteOffset degrades to start of audio with an empty index', () setupHeader: [1, 2, 3, 4, 5], totalByteLength: 0, totalDuration: 0, preSkip: 0, points: [], }); // start of audio == setup header length (server emits [setup pages][audio pages]). - assertEqual(resolveOpusByteOffset(sc, 10), 5, 'empty index degrades to audio start'); + assertEqual(resolveOpusByteOffset(sc, 10).byteOffset, 5, 'empty index degrades to audio start'); +}); + +// --- resolveOpusByteOffset: landingTimeSeconds (AC9 fine re-sync, §3.4a step 4) ----------------- + +test('resolveOpusByteOffset landingTimeSeconds equals the resolved page time, not the requested time', () => { + // Index: two points at t=0 s and t=0.5 s. + const preSkip = 312; + const sc = sidecarFrom({ + setupHeader: [0], totalByteLength: 50_000, totalDuration: 1.5, preSkip, + points: [ + { granule: preSkip, byteOffset: 4096 }, // t=0 + { granule: preSkip + OPUS_SAMPLE_RATE / 2, byteOffset: 9000 }, // t=0.5 s + ], + }); + // Seeking to 0.3 s lands on the t=0 page; landing should be 0, not 0.3. + const r03: OpusSeekResolution = resolveOpusByteOffset(sc, 0.3); + assertEqual(r03.byteOffset, 4096, 'seek 0.3 -> first page offset'); + assertEqual(r03.landingTimeSeconds, 0, 'landing at t=0 (page time, not target)'); + + // Seeking to exactly 0.5 s lands on the second page; landing == requested time. + const r05: OpusSeekResolution = resolveOpusByteOffset(sc, 0.5); + assertEqual(r05.byteOffset, 9000, 'seek 0.5 -> second page offset'); + assertEqual(r05.landingTimeSeconds, 0.5, 'landing == requested when exact page boundary'); +}); + +test('resolveOpusByteOffset empty index returns landingTimeSeconds = 0', () => { + const sc = sidecarFrom({ + setupHeader: [0, 1, 2], totalByteLength: 1000, totalDuration: 1.0, preSkip: 0, points: [], + }); + const r = resolveOpusByteOffset(sc, 5.0); + assertEqual(r.landingTimeSeconds, 0, 'empty index: landing is stream start (0 s)'); +}); + +// --- Lead-trim frame math (AC9 fine re-sync) --------------------------------------------------- +// The trim frame count is purely arithmetic: (target - landing) * 48000, rounded, clamped to ≥0. +// This is the exact formula in OpusStreamDecoder.reinitializeForRangeContinuation so we test it +// independently of the browser-bound WebCodecs decode. + +function leadTrimFrames(landingTimeSeconds: number, targetTimeSeconds: number): number { + return Math.max(0, Math.round((targetTimeSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE)); +} + +test('lead-trim frame count is (target - landing) * 48000, rounded', () => { + // Page at t=0, seek to 0.3 s: trim 0.3 * 48000 = 14400 frames. + assertEqual(leadTrimFrames(0, 0.3), 14400, 'trim for 0.3 s offset'); + // Page at t=0.5 s, seek to 0.7 s: trim 0.2 * 48000 = 9600 frames. + assertEqual(leadTrimFrames(0.5, 0.7), 9600, 'trim for 0.2 s offset'); + // Exact page boundary: no trim needed. + assertEqual(leadTrimFrames(0.5, 0.5), 0, 'no trim when target == landing'); + // Guard against floating-point rounding producing a tiny negative: clamp to 0. + assertEqual(leadTrimFrames(0.5000001, 0.5), 0, 'negative rounds to zero (guard)'); }); // --- OggDemuxer: page -> packet extraction ---------------------------------------------------- diff --git a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts index b8fb03c..01c61f2 100644 --- a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts +++ b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts @@ -52,6 +52,11 @@ export class OpusStreamDecoder implements IStreamingDecoder { private decodedQueue: AudioData[] = []; private fatalError = false; + // Frames to discard from the head of the first post-seek decoded output (AC9 fine re-sync). + // Set by reinitializeForRangeContinuation to (targetTimeSeconds - landingTimeSeconds) * 48000, + // consumed frame-by-frame in audioDataToBuffer until exhausted (then zero for the rest of the stream). + private leadTrimFrames = 0; + // Monotonic packet timestamp (microseconds) handed to each EncodedAudioChunk. WebCodecs requires // strictly increasing timestamps; the true value is irrelevant to us (we schedule by accumulation), // so a synthetic 48 kHz-derived counter suffices and stays exact. @@ -135,14 +140,14 @@ export class OpusStreamDecoder implements IStreamingDecoder { const packets = this.demuxer.push(chunk); this.decodePackets(packets); - // Give the WebCodecs output callback a chance to run before we drain. + // Wait until the WebCodecs decoder has processed the queued packets before draining. await this.yieldToDecoder(); - return this.drainDecoded(false); + return this.drainDecoded(); } async complete(): Promise { if (this.fatalError || !this.decoder || this.decoder.state !== 'configured') { - return this.drainDecoded(true); + return this.drainDecoded(); } try { await this.decoder.flush(); @@ -151,10 +156,28 @@ export class OpusStreamDecoder implements IStreamingDecoder { // own cancellation handles that — surface nothing, just drain what we have. console.warn('Opus decoder flush interrupted:', (err as Error).message); } - return this.drainDecoded(true); + return this.drainDecoded(); } - reinitializeForRangeContinuation(): void { + /** + * Reinitialize for a Range-continuation stream after seek-beyond-buffer. + * + * @param landingTimeSeconds The actual page-start presentation time resolved from the seek index + * (t_page ≤ targetTimeSeconds). This is the time at which the decoder + * will begin emitting audio after reconfigure. + * @param targetTimeSeconds The user-requested seek position. The difference + * `(target - landing) * OPUS_SAMPLE_RATE` frames are trimmed from the + * head of the decoded output so playback lands precisely at the target + * (AC9 fine re-sync, §3.4a step 4). + * + * Pre-skip note: the reconfigure re-applies the WebCodecs Opus decoder's own pre-skip trim. The + * W3C spec is non-normative on the exact sample count and browsers vary (~312 samples at 48 kHz in + * practice). `leadTrimFrames` is computed from the sidecar's pre-skip-corrected presentation times + * (via `presentationTimeSeconds`), so it does NOT double-count the per-reconfigure pre-skip; the + * decoder handles that internally. If browser testing reveals a residual offset, adjust the + * `leadTrimFrames` calculation here — this is the single point of control. + */ + reinitializeForRangeContinuation(landingTimeSeconds: number, targetTimeSeconds: number): void { // New 206 body starts on a page boundary with no setup pages; the codec config is unchanged but // inter-frame state must restart cleanly. AudioDecoder.reset() drops queued work and returns the // decoder to 'unconfigured', so we reconfigure with the cached config. The demuxer goes into @@ -162,6 +185,9 @@ export class OpusStreamDecoder implements IStreamingDecoder { this.demuxer.reset(true); this.decodedQueue = []; this.emittedFrames = 0; // post-seek buffers are positioned by the scheduler's playbackOffset + // Arm the lead trim: skip enough decoded frames to land at targetTimeSeconds, not at + // landingTimeSeconds (the page start). Clamp to ≥0 to guard against floating-point rounding. + this.leadTrimFrames = Math.max(0, Math.round((targetTimeSeconds - landingTimeSeconds) * OPUS_SAMPLE_RATE)); if (this.decoder && this.decoder.state === 'configured') { this.decoder.reset(); this.decoder.configure(this.buildConfig()); @@ -204,10 +230,10 @@ export class OpusStreamDecoder implements IStreamingDecoder { } /** - * Convert every queued AudioData into an AudioBuffer at the context sample rate, applying end-trim - * against the known total frame count. `final` allows the very last partial buffer to be emitted. + * Convert every queued AudioData into an AudioBuffer at the context sample rate, applying + * end-trim against the known total frame count and lead-trim for post-seek fine re-sync. */ - private drainDecoded(_final: boolean): AudioBuffer[] { + private drainDecoded(): AudioBuffer[] { const out: AudioBuffer[] = []; const ctx = this.contextManager.getContext(); @@ -224,39 +250,66 @@ export class OpusStreamDecoder implements IStreamingDecoder { } /** - * Copy an AudioData's PCM into a new AudioBuffer, trimming to not exceed the known total length - * (end-trim). Returns null if the trim leaves zero frames (the buffer is entirely past the end). + * Copy an AudioData's PCM into a new AudioBuffer, applying: + * 1. Lead-trim (post-seek fine re-sync): skip `leadTrimFrames` from the front so the audible + * start lands at the requested seek position, not at the preceding page boundary (AC9). + * 2. End-trim: cap cumulative output at `totalFrames` so the final partial frame's padding + * does not leak past the true stream end. + * Returns null when either trim leaves zero usable frames. */ private audioDataToBuffer(ctx: BaseAudioContext, data: AudioData): AudioBuffer | null { const frames = data.numberOfFrames; const channels = data.numberOfChannels; + // Lead-trim: consume frames from the front for post-seek fine re-sync (AC9). + let skip = 0; + if (this.leadTrimFrames > 0) { + skip = Math.min(this.leadTrimFrames, frames); + this.leadTrimFrames -= skip; + } + const available = frames - skip; + if (available <= 0) return null; + // End-trim: cap cumulative output at totalFrames. - let keep = frames; + let keep = available; if (Number.isFinite(this.totalFrames)) { const room = this.totalFrames - this.emittedFrames; if (room <= 0) return null; - if (room < frames) keep = room; + if (room < available) keep = room; } if (keep <= 0) return null; const buffer = ctx.createBuffer(channels, keep, data.sampleRate); - const plane = new Float32Array(frames); // copyTo fills the full frame count, then we slice + // Allocate only for the frames we actually copy; frameOffset skips the lead-trim region. + const plane = new Float32Array(keep); for (let ch = 0; ch < channels; ch++) { - data.copyTo(plane, { planeIndex: ch, format: 'f32-planar' }); - buffer.copyToChannel(keep === frames ? plane : plane.subarray(0, keep), ch); + data.copyTo(plane, { planeIndex: ch, frameOffset: skip, frameCount: keep, format: 'f32-planar' }); + buffer.copyToChannel(plane, ch); } this.emittedFrames += keep; return buffer; } /** - * Yield to the microtask/event loop so the synchronous decode() calls above let their async output - * callbacks fire before we drain. A zero-delay timeout (macrotask) is the reliable cross-engine way - * to let WebCodecs deliver outputs; awaiting decodeQueueSize draining is the precise alternative but - * not all engines settle it synchronously. + * Wait until the AudioDecoder's internal work queue drains (decodeQueueSize → 0), so output + * callbacks have fired before we drain decodedQueue. Bounded to MAX_YIELD_ITERS × 4 ms to guard + * against a stuck decoder; any outputs collected before the cap are still returned. `complete()` + * uses decoder.flush() as its final barrier instead (flush() is the authoritative end-of-stream + * drain). */ private yieldToDecoder(): Promise { - return new Promise((resolve) => setTimeout(resolve, 0)); + const MAX_YIELD_ITERS = 50; // 50 × 4 ms = 200 ms ceiling + return new Promise((resolve) => { + let iters = 0; + const poll = () => { + if (!this.decoder || this.decoder.decodeQueueSize === 0 || iters >= MAX_YIELD_ITERS) { + resolve(); + return; + } + iters++; + setTimeout(poll, 4); + }; + poll(); + }); } }