using System.Buffers.Binary; namespace DeepDrftContent.Processors.Opus; /// /// The result of walking an encoded Ogg Opus stream once: the captured setup header (the leading /// OpusHead + OpusTags pages, verbatim) and the bucketed granule→byte seek index. This /// is everything the sidecar artifact carries (§3.4a) — built at transcode time so delivery never /// re-walks the stream. /// /// The leading setup pages (OpusHead + OpusTags), exactly as they /// appear at the start of the stream, ready to prepend to any mid-stream page run before decode. /// The accurate, 0.5 s-bucketed granule→byte transfer function. public sealed record OggOpusWalk(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex); /// /// Pure Ogg-Opus stream walker. Reads the page structure directly (the OggS capture pattern and /// the 27-byte page header) to (1) capture the setup-header pages and (2) record, for every audio page, /// its end granule position and exact byte offset — bucketed to 0.5 s with each bucket boundary snapped /// to the nearest enclosing page start. No external dependency: the encoder (FFmpeg) produces the bytes; /// this turns them into the seek artifact deterministically, so it is unit-testable without a codec. /// public static class OggOpusParser { /// /// Walks and produces the setup header + seek index, or null if the /// bytes are not a recognisable Ogg Opus stream (no setup header, no audio pages, or truncated /// structure). A null is the caller's signal to treat the transcode as failed and leave the track /// lossless-only (C6) — it does not throw for malformed input. /// public static OggOpusWalk? Walk(ReadOnlySpan oggBytes) { var setupHeaderEnd = -1; var sawOpusHead = false; var sawOpusTags = false; ushort preSkip = 0; var points = new List(); ulong lastGranule = 0; var nextBucketTime = 0.0; var firstAudioPointTaken = false; var offset = 0; while (offset + OggOpusConstants.OggPageHeaderSize <= oggBytes.Length) { var page = oggBytes.Slice(offset); if (!page[..4].SequenceEqual(OggOpusConstants.CapturePattern)) { // Not on a page boundary — the encoder writes contiguous pages, so this means the // stream is malformed or we mis-stepped. Either way it is unrecoverable here. return null; } var segmentCount = page[OggOpusConstants.PageSegmentCountOffset]; var segmentTableEnd = OggOpusConstants.OggPageHeaderSize + segmentCount; if (segmentTableEnd > page.Length) return null; // truncated header var payloadSize = 0; for (var i = 0; i < segmentCount; i++) payloadSize += page[OggOpusConstants.OggPageHeaderSize + i]; var pageTotalSize = segmentTableEnd + payloadSize; if (pageTotalSize > page.Length) return null; // truncated payload var payload = page.Slice(segmentTableEnd, payloadSize); var granule = BinaryPrimitives.ReadUInt64LittleEndian( page.Slice(OggOpusConstants.GranulePositionOffset, 8)); // The setup pages carry no audio granule (OpusHead has granulepos 0; OpusTags too). They // are the leading pages whose payload opens with the Opus magic signatures. if (!sawOpusHead && StartsWith(payload, OggOpusConstants.OpusHeadSignature)) { sawOpusHead = true; setupHeaderEnd = offset + pageTotalSize; // RFC 7845 §5.1 — OpusHead layout after the 8-byte "OpusHead" magic: // [0] version (1 byte), [1] channel count (1 byte), // [2-3] pre_skip (little-endian uint16) ← at packet bytes 10-11 // pre_skip is the number of decoder samples to discard before presenting audio; // all granule→time conversions must subtract it (RFC 7845 §4.3). if (payload.Length >= OggOpusConstants.OpusHeadMinSize) preSkip = BinaryPrimitives.ReadUInt16LittleEndian( payload.Slice(OggOpusConstants.OpusHeadPreSkipOffset, 2)); } else if (sawOpusHead && !sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature)) { sawOpusTags = true; setupHeaderEnd = offset + pageTotalSize; } else if (sawOpusHead && sawOpusTags) { // Audio page. Record the first audio page unconditionally (the seek anchor at t=0), // then one entry per 0.5 s bucket. A page with no end-granule (mid-packet continuation, // granulepos == -1) is skipped for indexing — its time is unknown — but still advances // the byte cursor. if (granule != OggOpusConstants.NoGranulePosition) { // RFC 7845 §4.3: presentation time = max(0, granule − preSkip) / 48000. // Use this corrected time for bucketing so that a stream with pre-skip 3840 (~80 ms) // does not systematically offset every indexed time by that amount. var correctedTime = Math.Max(0.0, (granule - (double)preSkip) / OggOpusConstants.OpusSampleRate); if (!firstAudioPointTaken) { // Anchor the first seek point at corrected time = 0 by storing the granule as // preSkip. This guarantees that a binary search for t=0 ("largest entry with // corrected time ≤ 0") always resolves to the first audio page's byte offset — // even when the real granule is slightly above preSkip due to encoder lead-in. points.Add(new OpusSeekPoint(preSkip, (ulong)offset)); firstAudioPointTaken = true; nextBucketTime = OggOpusConstants.SeekBucketSeconds; } else if (correctedTime >= nextBucketTime) { points.Add(new OpusSeekPoint(granule, (ulong)offset)); // Advance past every bucket this page crossed so a long page does not emit a // backlog of entries; the next bucket is the first boundary strictly after it. while (nextBucketTime <= correctedTime) nextBucketTime += OggOpusConstants.SeekBucketSeconds; } lastGranule = granule; } } offset += pageTotalSize; } if (!sawOpusHead || setupHeaderEnd < 0 || points.Count == 0) return null; var setupHeader = oggBytes[..setupHeaderEnd].ToArray(); // RFC 7845 §4.3: total duration is also pre-skip-corrected, matching the time a listener // experiences (the last audio page's corrected time, clamped to ≥ 0). var totalDuration = Math.Max(0.0, (lastGranule - (double)preSkip) / OggOpusConstants.OpusSampleRate); var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length, preSkip); return new OggOpusWalk(setupHeader, index); } private static bool StartsWith(ReadOnlySpan payload, ReadOnlySpan signature) => payload.Length >= signature.Length && payload[..signature.Length].SequenceEqual(signature); }