using System.Buffers.Binary; namespace DeepDrftContent.Processors.Opus; /// /// The result of walking an encoded Ogg Opus stream once: the captured setup header (the leading /// OpusHead + OpusTags pages, verbatim) and the bucketed granule→byte seek index. This /// is everything the sidecar artifact carries (§3.4a) — built at transcode time so delivery never /// re-walks the stream. /// /// The leading setup pages (OpusHead + OpusTags), exactly as they /// appear at the start of the stream, ready to prepend to any mid-stream page run before decode. /// The accurate, 0.5 s-bucketed granule→byte transfer function. public sealed record OggOpusWalk(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex); /// /// Pure Ogg-Opus stream walker. Reads the page structure directly (the OggS capture pattern and /// the 27-byte page header) to (1) capture the setup-header pages and (2) record, for every audio page, /// its end granule position and exact byte offset — bucketed to 0.5 s with each bucket boundary snapped /// to the nearest enclosing page start. No external dependency: the encoder (FFmpeg) produces the bytes; /// this turns them into the seek artifact deterministically, so it is unit-testable without a codec. /// /// /// Two entry points share one page-processing core, so they produce byte-identical /// output by construction (the project's parity-oracle convention, mirroring /// RmsLoudnessAlgorithm.Compute over its accumulator): /// /// — the whole-buffer overload, retained as the byte-identity /// parity oracle for the streaming variant. /// — the streaming /// variant: walks the page structure from a forward stream in a bounded read buffer (one Ogg page at a /// time), so peak managed memory is O(buffer + seek-index + setup-header), independent of file size. /// /// public static class OggOpusParser { /// /// The largest a single Ogg page can be: header(27) + a full 255-entry segment table + the maximum /// payload those segments can describe (255 × 255 bytes). The streaming read buffer is floored at this /// so a complete page always fits, which means a short read on a page can only mean a truncated stream. /// private const int MaxOggPageSize = OggOpusConstants.OggPageHeaderSize + 255 + 255 * 255; // 65307 /// /// Walks and produces the setup header + seek index, or null if the /// bytes are not a recognisable Ogg Opus stream (no setup header, no audio pages, or truncated /// structure). A null is the caller's signal to treat the transcode as failed and leave the track /// lossless-only (C6) — it does not throw for malformed input. /// public static OggOpusWalk? Walk(ReadOnlySpan oggBytes) { var state = new WalkState(); var offset = 0; while (offset + OggOpusConstants.OggPageHeaderSize <= oggBytes.Length) { var page = oggBytes.Slice(offset); if (!page[..4].SequenceEqual(OggOpusConstants.CapturePattern)) { // Not on a page boundary — the encoder writes contiguous pages, so this means the // stream is malformed or we mis-stepped. Either way it is unrecoverable here. return null; } var segmentCount = page[OggOpusConstants.PageSegmentCountOffset]; var segmentTableEnd = OggOpusConstants.OggPageHeaderSize + segmentCount; if (segmentTableEnd > page.Length) return null; // truncated header var payloadSize = 0; for (var i = 0; i < segmentCount; i++) payloadSize += page[OggOpusConstants.OggPageHeaderSize + i]; var pageTotalSize = segmentTableEnd + payloadSize; if (pageTotalSize > page.Length) return null; // truncated payload var payload = page.Slice(segmentTableEnd, payloadSize); var granule = BinaryPrimitives.ReadUInt64LittleEndian( page.Slice(OggOpusConstants.GranulePositionOffset, 8)); state.AddPage(granule, payload, page.Slice(0, pageTotalSize), offset); offset += pageTotalSize; } return state.Finish((ulong)oggBytes.Length); } /// /// Streaming counterpart of : walks the Ogg page structure from /// a forward in a bounded read buffer (one Ogg page at a time), producing a /// byte-identical without ever holding the whole encoded file in memory. /// Returns null on the same malformed/truncated conditions as the buffer overload — it does not throw /// for bad input (only propagates on cancellation). /// public static Task WalkAsync(Stream stream, CancellationToken cancellationToken = default) => WalkAsync(stream, MaxOggPageSize, cancellationToken); /// /// Buffer-size-parameterised core. is floored at /// so any single page always fits in the buffer; the absolute byte /// cursor (absoluteOffset) advances as the window compacts, so recorded seek offsets stay /// absolute even though the buffer holds only a small window at any instant. /// internal static async Task WalkAsync(Stream stream, int bufferSize, CancellationToken cancellationToken) { var state = new WalkState(); var buffer = new byte[Math.Max(bufferSize, MaxOggPageSize)]; var len = 0; // valid bytes held at buffer[0..len] long absoluteOffset = 0; // absolute stream position of buffer[0] while (true) { // The buffer overload's loop guard requires a full fixed header before parsing a page; once // fewer than that remain (and the stream is drained) it is the natural end of the stream. if (len < OggOpusConstants.OggPageHeaderSize) { len += await FillAsync(stream, buffer, len, cancellationToken); if (len < OggOpusConstants.OggPageHeaderSize) break; } if (!buffer.AsSpan(0, 4).SequenceEqual(OggOpusConstants.CapturePattern)) return null; var segmentCount = buffer[OggOpusConstants.PageSegmentCountOffset]; var segmentTableEnd = OggOpusConstants.OggPageHeaderSize + segmentCount; if (len < segmentTableEnd) { len += await FillAsync(stream, buffer, len, cancellationToken); if (len < segmentTableEnd) return null; // truncated header } var payloadSize = 0; for (var i = 0; i < segmentCount; i++) payloadSize += buffer[OggOpusConstants.OggPageHeaderSize + i]; var pageTotalSize = segmentTableEnd + payloadSize; if (len < pageTotalSize) { len += await FillAsync(stream, buffer, len, cancellationToken); if (len < pageTotalSize) return null; // truncated payload (page never fully arrived before EOF) } var page = buffer.AsSpan(0, pageTotalSize); var granule = BinaryPrimitives.ReadUInt64LittleEndian( page.Slice(OggOpusConstants.GranulePositionOffset, 8)); var payload = page.Slice(segmentTableEnd, payloadSize); state.AddPage(granule, payload, page, absoluteOffset); // Compact the consumed page off the front of the window; the absolute cursor advances by the // exact page size so every offset the state records remains an absolute stream position. var remaining = len - pageTotalSize; if (remaining > 0) Buffer.BlockCopy(buffer, pageTotalSize, buffer, 0, remaining); len = remaining; absoluteOffset += pageTotalSize; } return state.Finish((ulong)absoluteOffset + (ulong)len); } /// /// Fills from to its end, issuing as many reads as /// needed until the buffer is full or the stream is exhausted. Returns the number of bytes added. /// private static async Task FillAsync(Stream stream, byte[] buffer, int offset, CancellationToken ct) { var added = 0; while (offset + added < buffer.Length) { var read = await stream.ReadAsync(buffer.AsMemory(offset + added, buffer.Length - offset - added), ct); if (read == 0) break; added += read; } return added; } private static bool StartsWith(ReadOnlySpan payload, ReadOnlySpan signature) => payload.Length >= signature.Length && payload[..signature.Length].SequenceEqual(signature); /// /// The single page-processing core both and /// drive, page by page, in stream order. Holding /// the setup-header + seek-index accumulation here is what makes the two entry points byte-identical /// by construction: there is exactly one copy of the OpusHead/OpusTags detection, pre-skip correction, /// t=0 anchoring, and 0.5 s bucketing logic. /// private sealed class WalkState { // The real setup (OpusHead + OpusTags pages) is a few KB; this cap bounds the streaming capture so // a malformed head-without-tags stream cannot grow it unboundedly. A stream that exceeds it has no // OpusTags within the cap, so no audio points are ever recorded and Finish returns null either way // — the cap never changes the output of a stream that produces a non-null result. private const int MaxSetupHeaderBytes = 8 * 1024 * 1024; private bool _sawOpusHead; private bool _sawOpusTags; private ushort _preSkip; private int _setupHeaderEnd = -1; private bool _capturingSetup = true; private readonly List _setupHeader = new(); private readonly List _points = new(); private ulong _lastGranule; private double _nextBucketTime; private bool _firstAudioPointTaken; /// /// Processes one fully-framed page. is the whole page (header + /// segment table + payload) for verbatim setup capture; is the /// page's absolute start in the stream — the value recorded in the seek index. /// public void AddPage(ulong granule, ReadOnlySpan payload, ReadOnlySpan pageBytes, long absoluteOffset) { if (_capturingSetup) { if (_setupHeader.Count + pageBytes.Length > MaxSetupHeaderBytes) _capturingSetup = false; // malformed: give up capture (result will be null without tags) else _setupHeader.AddRange(pageBytes); } // The setup pages carry no audio granule (OpusHead has granulepos 0; OpusTags too). They // are the leading pages whose payload opens with the Opus magic signatures. if (!_sawOpusHead && StartsWith(payload, OggOpusConstants.OpusHeadSignature)) { _sawOpusHead = true; _setupHeaderEnd = (int)(absoluteOffset + pageBytes.Length); // RFC 7845 §5.1 — OpusHead layout after the 8-byte "OpusHead" magic: // [0] version (1 byte), [1] channel count (1 byte), // [2-3] pre_skip (little-endian uint16) ← at packet bytes 10-11 // pre_skip is the number of decoder samples to discard before presenting audio; // all granule→time conversions must subtract it (RFC 7845 §4.3). if (payload.Length >= OggOpusConstants.OpusHeadMinSize) _preSkip = BinaryPrimitives.ReadUInt16LittleEndian( payload.Slice(OggOpusConstants.OpusHeadPreSkipOffset, 2)); } else if (_sawOpusHead && !_sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature)) { _sawOpusTags = true; _setupHeaderEnd = (int)(absoluteOffset + pageBytes.Length); // The setup header ends at the OpusTags page; stop capturing so audio pages never grow it. _capturingSetup = false; } else if (_sawOpusHead && _sawOpusTags) { // Audio page. Record the first audio page unconditionally (the seek anchor at t=0), // then one entry per 0.5 s bucket. A page with no end-granule (mid-packet continuation, // granulepos == -1) is skipped for indexing — its time is unknown — but still advances // the byte cursor. if (granule != OggOpusConstants.NoGranulePosition) { // RFC 7845 §4.3: presentation time = max(0, granule − preSkip) / 48000. // Use this corrected time for bucketing so that a stream with pre-skip 3840 (~80 ms) // does not systematically offset every indexed time by that amount. var correctedTime = Math.Max(0.0, (granule - (double)_preSkip) / OggOpusConstants.OpusSampleRate); if (!_firstAudioPointTaken) { // Anchor the first seek point at corrected time = 0 by storing the granule as // preSkip. This guarantees that a binary search for t=0 ("largest entry with // corrected time ≤ 0") always resolves to the first audio page's byte offset — // even when the real granule is slightly above preSkip due to encoder lead-in. _points.Add(new OpusSeekPoint(_preSkip, (ulong)absoluteOffset)); _firstAudioPointTaken = true; _nextBucketTime = OggOpusConstants.SeekBucketSeconds; } else if (correctedTime >= _nextBucketTime) { _points.Add(new OpusSeekPoint(granule, (ulong)absoluteOffset)); // Advance past every bucket this page crossed so a long page does not emit a // backlog of entries; the next bucket is the first boundary strictly after it. while (_nextBucketTime <= correctedTime) _nextBucketTime += OggOpusConstants.SeekBucketSeconds; } _lastGranule = granule; } } } /// /// Produces the final walk, or null on the same conditions the buffer overload rejected: /// no OpusHead, no captured setup header, or no audio seek points. /// is the full stream length, recorded for end-of-stream seek clamping. /// public OggOpusWalk? Finish(ulong totalByteLength) { if (!_sawOpusHead || _setupHeaderEnd < 0 || _points.Count == 0) return null; var setupHeader = _setupHeader.ToArray(); // RFC 7845 §4.3: total duration is also pre-skip-corrected, matching the time a listener // experiences (the last audio page's corrected time, clamped to ≥ 0). var totalDuration = Math.Max(0.0, (_lastGranule - (double)_preSkip) / OggOpusConstants.OpusSampleRate); var index = new OggOpusSeekIndex(_points, totalDuration, totalByteLength, _preSkip); return new OggOpusWalk(setupHeader, index); } } }