using System.Buffers.Binary; namespace DeepDrftContent.Processors.Opus; /// /// The result of walking an encoded Ogg Opus stream once: the captured setup header (the leading /// OpusHead + OpusTags pages, verbatim) and the bucketed granule→byte seek index. This /// is everything the sidecar artifact carries (§3.4a) — built at transcode time so delivery never /// re-walks the stream. /// /// The leading setup pages (OpusHead + OpusTags), exactly as they /// appear at the start of the stream, ready to prepend to any mid-stream page run before decode. /// The accurate, 0.5 s-bucketed granule→byte transfer function. public sealed record OggOpusWalk(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex); /// /// Pure Ogg-Opus stream walker. Reads the page structure directly (the OggS capture pattern and /// the 27-byte page header) to (1) capture the setup-header pages and (2) record, for every audio page, /// its end granule position and exact byte offset — bucketed to 0.5 s with each bucket boundary snapped /// to the nearest enclosing page start. No external dependency: the encoder (FFmpeg) produces the bytes; /// this turns them into the seek artifact deterministically, so it is unit-testable without a codec. /// public static class OggOpusParser { /// /// Walks and produces the setup header + seek index, or null if the /// bytes are not a recognisable Ogg Opus stream (no setup header, no audio pages, or truncated /// structure). A null is the caller's signal to treat the transcode as failed and leave the track /// lossless-only (C6) — it does not throw for malformed input. /// public static OggOpusWalk? Walk(ReadOnlySpan oggBytes) { var setupHeaderEnd = -1; var sawOpusHead = false; var sawOpusTags = false; var points = new List(); ulong lastGranule = 0; var nextBucketTime = 0.0; var firstAudioPointTaken = false; var offset = 0; while (offset + OggOpusConstants.OggPageHeaderSize <= oggBytes.Length) { var page = oggBytes.Slice(offset); if (!page[..4].SequenceEqual(OggOpusConstants.CapturePattern)) { // Not on a page boundary — the encoder writes contiguous pages, so this means the // stream is malformed or we mis-stepped. Either way it is unrecoverable here. return null; } var segmentCount = page[OggOpusConstants.PageSegmentCountOffset]; var segmentTableEnd = OggOpusConstants.OggPageHeaderSize + segmentCount; if (segmentTableEnd > page.Length) return null; // truncated header var payloadSize = 0; for (var i = 0; i < segmentCount; i++) payloadSize += page[OggOpusConstants.OggPageHeaderSize + i]; var pageTotalSize = segmentTableEnd + payloadSize; if (pageTotalSize > page.Length) return null; // truncated payload var payload = page.Slice(segmentTableEnd, payloadSize); var granule = BinaryPrimitives.ReadUInt64LittleEndian( page.Slice(OggOpusConstants.GranulePositionOffset, 8)); // The setup pages carry no audio granule (OpusHead has granulepos 0; OpusTags too). They // are the leading pages whose payload opens with the Opus magic signatures. if (!sawOpusHead && StartsWith(payload, OggOpusConstants.OpusHeadSignature)) { sawOpusHead = true; setupHeaderEnd = offset + pageTotalSize; } else if (sawOpusHead && !sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature)) { sawOpusTags = true; setupHeaderEnd = offset + pageTotalSize; } else if (sawOpusHead && sawOpusTags) { // Audio page. Record the first audio page unconditionally (the seek anchor at t=0), // then one entry per 0.5 s bucket. A page with no end-granule (mid-packet continuation, // granulepos == -1) is skipped for indexing — its time is unknown — but still advances // the byte cursor. if (granule != OggOpusConstants.NoGranulePosition) { var pageTime = granule / OggOpusConstants.OpusSampleRate; if (!firstAudioPointTaken) { points.Add(new OpusSeekPoint(granule, (ulong)offset)); firstAudioPointTaken = true; nextBucketTime = OggOpusConstants.SeekBucketSeconds; } else if (pageTime >= nextBucketTime) { points.Add(new OpusSeekPoint(granule, (ulong)offset)); // Advance past every bucket this page crossed so a long page does not emit a // backlog of entries; the next bucket is the first boundary strictly after it. while (nextBucketTime <= pageTime) nextBucketTime += OggOpusConstants.SeekBucketSeconds; } lastGranule = granule; } } offset += pageTotalSize; } if (!sawOpusHead || setupHeaderEnd < 0 || points.Count == 0) return null; var setupHeader = oggBytes[..setupHeaderEnd].ToArray(); var totalDuration = lastGranule / OggOpusConstants.OpusSampleRate; var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length); return new OggOpusWalk(setupHeader, index); } private static bool StartsWith(ReadOnlySpan payload, ReadOnlySpan signature) => payload.Length >= signature.Length && payload[..signature.Length].SequenceEqual(signature); }