147 lines
7.7 KiB
C#
147 lines
7.7 KiB
C#
using System.Buffers.Binary;
|
||
|
||
namespace DeepDrftContent.Processors.Opus;
|
||
|
||
/// <summary>
|
||
/// The result of walking an encoded Ogg Opus stream once: the captured setup header (the leading
|
||
/// <c>OpusHead</c> + <c>OpusTags</c> pages, verbatim) and the bucketed granule→byte seek index. This
|
||
/// is everything the sidecar artifact carries (§3.4a) — built at transcode time so delivery never
|
||
/// re-walks the stream.
|
||
/// </summary>
|
||
/// <param name="SetupHeaderBytes">The leading setup pages (OpusHead + OpusTags), exactly as they
|
||
/// appear at the start of the stream, ready to prepend to any mid-stream page run before decode.</param>
|
||
/// <param name="SeekIndex">The accurate, 0.5 s-bucketed granule→byte transfer function.</param>
|
||
public sealed record OggOpusWalk(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex);
|
||
|
||
/// <summary>
|
||
/// Pure Ogg-Opus stream walker. Reads the page structure directly (the <c>OggS</c> capture pattern and
|
||
/// the 27-byte page header) to (1) capture the setup-header pages and (2) record, for every audio page,
|
||
/// its end granule position and exact byte offset — bucketed to 0.5 s with each bucket boundary snapped
|
||
/// to the nearest enclosing page start. No external dependency: the encoder (FFmpeg) produces the bytes;
|
||
/// this turns them into the seek artifact deterministically, so it is unit-testable without a codec.
|
||
/// </summary>
|
||
public static class OggOpusParser
|
||
{
|
||
/// <summary>
|
||
/// Walks <paramref name="oggBytes"/> and produces the setup header + seek index, or null if the
|
||
/// bytes are not a recognisable Ogg Opus stream (no setup header, no audio pages, or truncated
|
||
/// structure). A null is the caller's signal to treat the transcode as failed and leave the track
|
||
/// lossless-only (C6) — it does not throw for malformed input.
|
||
/// </summary>
|
||
public static OggOpusWalk? Walk(ReadOnlySpan<byte> oggBytes)
|
||
{
|
||
var setupHeaderEnd = -1;
|
||
var sawOpusHead = false;
|
||
var sawOpusTags = false;
|
||
ushort preSkip = 0;
|
||
|
||
var points = new List<OpusSeekPoint>();
|
||
ulong lastGranule = 0;
|
||
var nextBucketTime = 0.0;
|
||
var firstAudioPointTaken = false;
|
||
|
||
var offset = 0;
|
||
while (offset + OggOpusConstants.OggPageHeaderSize <= oggBytes.Length)
|
||
{
|
||
var page = oggBytes.Slice(offset);
|
||
if (!page[..4].SequenceEqual(OggOpusConstants.CapturePattern))
|
||
{
|
||
// Not on a page boundary — the encoder writes contiguous pages, so this means the
|
||
// stream is malformed or we mis-stepped. Either way it is unrecoverable here.
|
||
return null;
|
||
}
|
||
|
||
var segmentCount = page[OggOpusConstants.PageSegmentCountOffset];
|
||
var segmentTableEnd = OggOpusConstants.OggPageHeaderSize + segmentCount;
|
||
if (segmentTableEnd > page.Length)
|
||
return null; // truncated header
|
||
|
||
var payloadSize = 0;
|
||
for (var i = 0; i < segmentCount; i++)
|
||
payloadSize += page[OggOpusConstants.OggPageHeaderSize + i];
|
||
|
||
var pageTotalSize = segmentTableEnd + payloadSize;
|
||
if (pageTotalSize > page.Length)
|
||
return null; // truncated payload
|
||
|
||
var payload = page.Slice(segmentTableEnd, payloadSize);
|
||
var granule = BinaryPrimitives.ReadUInt64LittleEndian(
|
||
page.Slice(OggOpusConstants.GranulePositionOffset, 8));
|
||
|
||
// The setup pages carry no audio granule (OpusHead has granulepos 0; OpusTags too). They
|
||
// are the leading pages whose payload opens with the Opus magic signatures.
|
||
if (!sawOpusHead && StartsWith(payload, OggOpusConstants.OpusHeadSignature))
|
||
{
|
||
sawOpusHead = true;
|
||
setupHeaderEnd = offset + pageTotalSize;
|
||
|
||
// RFC 7845 §5.1 — OpusHead layout after the 8-byte "OpusHead" magic:
|
||
// [0] version (1 byte), [1] channel count (1 byte),
|
||
// [2-3] pre_skip (little-endian uint16) ← at packet bytes 10-11
|
||
// pre_skip is the number of decoder samples to discard before presenting audio;
|
||
// all granule→time conversions must subtract it (RFC 7845 §4.3).
|
||
if (payload.Length >= OggOpusConstants.OpusHeadMinSize)
|
||
preSkip = BinaryPrimitives.ReadUInt16LittleEndian(
|
||
payload.Slice(OggOpusConstants.OpusHeadPreSkipOffset, 2));
|
||
}
|
||
else if (sawOpusHead && !sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature))
|
||
{
|
||
sawOpusTags = true;
|
||
setupHeaderEnd = offset + pageTotalSize;
|
||
}
|
||
else if (sawOpusHead && sawOpusTags)
|
||
{
|
||
// Audio page. Record the first audio page unconditionally (the seek anchor at t=0),
|
||
// then one entry per 0.5 s bucket. A page with no end-granule (mid-packet continuation,
|
||
// granulepos == -1) is skipped for indexing — its time is unknown — but still advances
|
||
// the byte cursor.
|
||
if (granule != OggOpusConstants.NoGranulePosition)
|
||
{
|
||
// RFC 7845 §4.3: presentation time = max(0, granule − preSkip) / 48000.
|
||
// Use this corrected time for bucketing so that a stream with pre-skip 3840 (~80 ms)
|
||
// does not systematically offset every indexed time by that amount.
|
||
var correctedTime = Math.Max(0.0,
|
||
(granule - (double)preSkip) / OggOpusConstants.OpusSampleRate);
|
||
|
||
if (!firstAudioPointTaken)
|
||
{
|
||
// Anchor the first seek point at corrected time = 0 by storing the granule as
|
||
// preSkip. This guarantees that a binary search for t=0 ("largest entry with
|
||
// corrected time ≤ 0") always resolves to the first audio page's byte offset —
|
||
// even when the real granule is slightly above preSkip due to encoder lead-in.
|
||
points.Add(new OpusSeekPoint(preSkip, (ulong)offset));
|
||
firstAudioPointTaken = true;
|
||
nextBucketTime = OggOpusConstants.SeekBucketSeconds;
|
||
}
|
||
else if (correctedTime >= nextBucketTime)
|
||
{
|
||
points.Add(new OpusSeekPoint(granule, (ulong)offset));
|
||
// Advance past every bucket this page crossed so a long page does not emit a
|
||
// backlog of entries; the next bucket is the first boundary strictly after it.
|
||
while (nextBucketTime <= correctedTime)
|
||
nextBucketTime += OggOpusConstants.SeekBucketSeconds;
|
||
}
|
||
|
||
lastGranule = granule;
|
||
}
|
||
}
|
||
|
||
offset += pageTotalSize;
|
||
}
|
||
|
||
if (!sawOpusHead || setupHeaderEnd < 0 || points.Count == 0)
|
||
return null;
|
||
|
||
var setupHeader = oggBytes[..setupHeaderEnd].ToArray();
|
||
// RFC 7845 §4.3: total duration is also pre-skip-corrected, matching the time a listener
|
||
// experiences (the last audio page's corrected time, clamped to ≥ 0).
|
||
var totalDuration = Math.Max(0.0,
|
||
(lastGranule - (double)preSkip) / OggOpusConstants.OpusSampleRate);
|
||
var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length, preSkip);
|
||
return new OggOpusWalk(setupHeader, index);
|
||
}
|
||
|
||
private static bool StartsWith(ReadOnlySpan<byte> payload, ReadOnlySpan<byte> signature) =>
|
||
payload.Length >= signature.Length && payload[..signature.Length].SequenceEqual(signature);
|
||
}
|