fix: Wave 18.1 review — pre-skip subtraction, t=0 anchor, PreSkip in sidecar, stderr on cancel

This commit is contained in:
daniel-c-harvey
2026-06-23 06:55:31 -04:00
parent 33d6f34d8a
commit 6add30a4ff
6 changed files with 251 additions and 45 deletions
@@ -90,11 +90,13 @@ public sealed class FfmpegOpusEncoder
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
TryKill(process);
await SafeStderr(stderrTask); // observe to avoid unobserved-task warnings
throw; // genuine shutdown cancellation — let it propagate
}
catch (OperationCanceledException)
{
TryKill(process);
await SafeStderr(stderrTask); // observe to avoid unobserved-task warnings
_logger.LogError("Opus transcode: ffmpeg exceeded the {Timeout}s timeout for {Source}.",
_options.TimeoutSeconds, sourcePath);
return false;
@@ -36,8 +36,23 @@ public static class OggOpusConstants
/// <summary>Sentinel granule position for a page that ends mid-packet (no usable timestamp).</summary>
public const ulong NoGranulePosition = 0xFFFFFFFFFFFFFFFFUL;
/// <summary>Header size of the serialized seek-index blob: totalBytes(8) + duration(8) + count(4).</summary>
public const int SeekIndexHeaderSize = 20;
/// <summary>
/// Minimum byte length of an <c>OpusHead</c> packet payload to safely read <c>pre_skip</c>.
/// RFC 7845 §5.1: "OpusHead"(8) + version(1) + channels(1) + pre_skip(2) = 12 bytes minimum.
/// </summary>
public const int OpusHeadMinSize = 12;
/// <summary>
/// Byte offset of <c>pre_skip</c> within the full <c>OpusHead</c> packet payload (including the
/// magic). RFC 7845 §5.1: "OpusHead"(8) + version(1) + channels(1) = 10 bytes before pre_skip.
/// </summary>
public const int OpusHeadPreSkipOffset = 10;
/// <summary>
/// Header size of the serialized seek-index blob:
/// totalBytes(8) + duration(8) + count(4) + preSkip(2) + reserved(2) = 24 bytes.
/// </summary>
public const int SeekIndexHeaderSize = 24;
/// <summary>Size of one serialized seek point: granulepos(8) + byteOffset(8).</summary>
public const int SeekPointSize = 16;
@@ -33,6 +33,7 @@ public static class OggOpusParser
var setupHeaderEnd = -1;
var sawOpusHead = false;
var sawOpusTags = false;
ushort preSkip = 0;
var points = new List<OpusSeekPoint>();
ulong lastGranule = 0;
@@ -73,6 +74,15 @@ public static class OggOpusParser
{
sawOpusHead = true;
setupHeaderEnd = offset + pageTotalSize;
// RFC 7845 §5.1 — OpusHead layout after the 8-byte "OpusHead" magic:
// [0] version (1 byte), [1] channel count (1 byte),
// [2-3] pre_skip (little-endian uint16) ← at packet bytes 10-11
// pre_skip is the number of decoder samples to discard before presenting audio;
// all granule→time conversions must subtract it (RFC 7845 §4.3).
if (payload.Length >= OggOpusConstants.OpusHeadMinSize)
preSkip = BinaryPrimitives.ReadUInt16LittleEndian(
payload.Slice(OggOpusConstants.OpusHeadPreSkipOffset, 2));
}
else if (sawOpusHead && !sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature))
{
@@ -87,19 +97,28 @@ public static class OggOpusParser
// the byte cursor.
if (granule != OggOpusConstants.NoGranulePosition)
{
var pageTime = granule / OggOpusConstants.OpusSampleRate;
// RFC 7845 §4.3: presentation time = max(0, granule preSkip) / 48000.
// Use this corrected time for bucketing so that a stream with pre-skip 3840 (~80 ms)
// does not systematically offset every indexed time by that amount.
var correctedTime = Math.Max(0.0,
(granule - (double)preSkip) / OggOpusConstants.OpusSampleRate);
if (!firstAudioPointTaken)
{
points.Add(new OpusSeekPoint(granule, (ulong)offset));
// Anchor the first seek point at corrected time = 0 by storing the granule as
// preSkip. This guarantees that a binary search for t=0 ("largest entry with
// corrected time ≤ 0") always resolves to the first audio page's byte offset —
// even when the real granule is slightly above preSkip due to encoder lead-in.
points.Add(new OpusSeekPoint(preSkip, (ulong)offset));
firstAudioPointTaken = true;
nextBucketTime = OggOpusConstants.SeekBucketSeconds;
}
else if (pageTime >= nextBucketTime)
else if (correctedTime >= nextBucketTime)
{
points.Add(new OpusSeekPoint(granule, (ulong)offset));
// Advance past every bucket this page crossed so a long page does not emit a
// backlog of entries; the next bucket is the first boundary strictly after it.
while (nextBucketTime <= pageTime)
while (nextBucketTime <= correctedTime)
nextBucketTime += OggOpusConstants.SeekBucketSeconds;
}
@@ -114,8 +133,11 @@ public static class OggOpusParser
return null;
var setupHeader = oggBytes[..setupHeaderEnd].ToArray();
var totalDuration = lastGranule / OggOpusConstants.OpusSampleRate;
var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length);
// RFC 7845 §4.3: total duration is also pre-skip-corrected, matching the time a listener
// experiences (the last audio page's corrected time, clamped to ≥ 0).
var totalDuration = Math.Max(0.0,
(lastGranule - (double)preSkip) / OggOpusConstants.OpusSampleRate);
var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length, preSkip);
return new OggOpusWalk(setupHeader, index);
}
@@ -4,16 +4,25 @@ namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// A single seek-index entry: an authoritative 48 kHz <see cref="GranulePosition"/> (Opus granule
/// positions are always sample counts at 48 kHz, so time = granulepos / 48000) paired with the exact
/// byte offset of the Ogg page that carries it. Every <see cref="ByteOffset"/> is a real page-start
/// boundary, so a <c>Range: bytes={ByteOffset}-</c> fetch lands the decoder Ogg-sync-aligned.
/// positions are always sample counts at 48 kHz) paired with the exact byte offset of the Ogg page that
/// carries it. Every <see cref="ByteOffset"/> is a real page-start boundary, so a
/// <c>Range: bytes={ByteOffset}-</c> fetch lands the decoder Ogg-sync-aligned.
/// </summary>
/// <remarks>
/// Per RFC 7845 §4.3, the PCM presentation time is <c>(granulepos preSkip) / 48000</c>. The raw
/// <see cref="GranulePosition"/> is stored here as-is; callers should subtract the containing
/// <see cref="OggOpusSeekIndex.PreSkip"/> before converting to a presentation time. Use
/// <see cref="OggOpusSeekIndex.PresentationTimeSeconds"/> for the corrected value.
/// </remarks>
/// <param name="GranulePosition">The page's end granule position (48 kHz sample count).</param>
/// <param name="ByteOffset">The byte offset of the page start in the Opus file.</param>
public readonly record struct OpusSeekPoint(ulong GranulePosition, ulong ByteOffset)
{
/// <summary>Time in seconds this granule position represents (granulepos / 48 kHz).</summary>
public double TimeSeconds => GranulePosition / OggOpusConstants.OpusSampleRate;
/// <summary>
/// Raw granule-position-to-time conversion (granulepos / 48 kHz). Does NOT subtract pre-skip — use
/// <see cref="OggOpusSeekIndex.PresentationTimeSeconds"/> for the RFC 7845-correct presentation time.
/// </summary>
public double RawTimeSeconds => GranulePosition / OggOpusConstants.OpusSampleRate;
}
/// <summary>
@@ -23,20 +32,38 @@ public readonly record struct OpusSeekPoint(ulong GranulePosition, ulong ByteOff
/// One entry per 0.5 s of audio (<see cref="OggOpusConstants.SeekBucketSeconds"/>), each snapped to the
/// nearest enclosing page start, plus the totals needed to clamp a seek to range.
/// </summary>
/// <param name="Points">Ordered (granulepos, byteOffset) entries, ascending. The first is always the
/// first audio page (offset just past the setup headers).</param>
/// <param name="TotalDurationSeconds">Total stream duration from the final granule position.</param>
/// <param name="Points">Ordered (granulepos, byteOffset) entries, ascending. The first entry always
/// has <see cref="OpusSeekPoint.GranulePosition"/> == <paramref name="PreSkip"/> (corrected time = 0)
/// and points at the first audio page start, ensuring a seek to t=0 always resolves.</param>
/// <param name="TotalDurationSeconds">
/// Pre-skip-corrected total stream duration: <c>max(0, lastGranule preSkip) / 48000</c>.
/// </param>
/// <param name="TotalByteLength">Total Opus file byte length, for clamping a seek past the end.</param>
/// <param name="PreSkip">
/// The <c>pre_skip</c> value from the <c>OpusHead</c> identification header (RFC 7845 §5.1). Opus
/// decoders must discard this many samples from the decoded start before presenting audio. The client
/// (wave 18.4) needs this to trim the first decoded buffer; storing it here avoids a re-parse of the
/// Ogg stream at delivery time.
/// </param>
public sealed record OggOpusSeekIndex(
IReadOnlyList<OpusSeekPoint> Points,
double TotalDurationSeconds,
ulong TotalByteLength)
ulong TotalByteLength,
ushort PreSkip)
{
/// <summary>
/// Returns the RFC 7845-correct presentation time for a seek point: <c>max(0, granule preSkip) / 48000</c>.
/// Use this for all time comparisons; raw <see cref="OpusSeekPoint.RawTimeSeconds"/> omits the pre-skip.
/// </summary>
public double PresentationTimeSeconds(OpusSeekPoint point) =>
Math.Max(0.0, (point.GranulePosition - (double)PreSkip) / OggOpusConstants.OpusSampleRate);
/// <summary>
/// Serializes the index to the compact little-endian binary blob the sidecar stores. Layout:
/// <c>[uint64 totalByteLength][double totalDurationSeconds][uint32 pointCount]</c> then
/// <c>pointCount × (uint64 granulepos, uint64 byteOffset)</c>. Fixed-width records keep the client
/// parse to a single typed-array read.
/// <c>[uint64 totalByteLength][double totalDurationSeconds][uint32 pointCount][uint16 preSkip][uint16 reserved]</c>
/// then <c>pointCount × (uint64 granulepos, uint64 byteOffset)</c>. The four-byte preSkip+reserved
/// region pads the header to 24 bytes, keeping the point table 8-byte-aligned.
/// Fixed-width records keep the client parse to a single typed-array read.
/// </summary>
public byte[] ToBytes()
{
@@ -47,6 +74,8 @@ public sealed record OggOpusSeekIndex(
BinaryPrimitives.WriteUInt64LittleEndian(span[..8], TotalByteLength);
BinaryPrimitives.WriteDoubleLittleEndian(span.Slice(8, 8), TotalDurationSeconds);
BinaryPrimitives.WriteUInt32LittleEndian(span.Slice(16, 4), (uint)Points.Count);
BinaryPrimitives.WriteUInt16LittleEndian(span.Slice(20, 2), PreSkip);
// bytes 22-23: reserved (zero-initialized by array allocation)
var cursor = OggOpusConstants.SeekIndexHeaderSize;
foreach (var point in Points)
@@ -73,6 +102,8 @@ public sealed record OggOpusSeekIndex(
var totalByteLength = BinaryPrimitives.ReadUInt64LittleEndian(bytes[..8]);
var totalDuration = BinaryPrimitives.ReadDoubleLittleEndian(bytes.Slice(8, 8));
var count = BinaryPrimitives.ReadUInt32LittleEndian(bytes.Slice(16, 4));
var preSkip = BinaryPrimitives.ReadUInt16LittleEndian(bytes.Slice(20, 2));
// bytes 22-23: reserved — ignored on read for forward-compatibility
var expected = OggOpusConstants.SeekIndexHeaderSize + (long)count * OggOpusConstants.SeekPointSize;
if (bytes.Length < expected)
@@ -88,6 +119,6 @@ public sealed record OggOpusSeekIndex(
cursor += OggOpusConstants.SeekPointSize;
}
return new OggOpusSeekIndex(points, totalDuration, totalByteLength);
return new OggOpusSeekIndex(points, totalDuration, totalByteLength, preSkip);
}
}
@@ -132,6 +132,9 @@ public sealed class OpusTranscodeService
private async Task EnsureVaultAsync()
{
// The TrackOpus vault is created at host startup (Startup.cs), so this guard is normally a
// no-op for the upload path. It is retained for the backfill path, which may run via a
// standalone CLI or a host that skips vault pre-creation, where the vault might not exist.
if (!_fileDatabase.HasVault(VaultConstants.TrackOpus))
await _fileDatabase.CreateVaultAsync(VaultConstants.TrackOpus, MediaVaultType.Audio);
}