From 33d6f34d8a7e19ddea004549ed8ca55a19341393 Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Tue, 23 Jun 2026 06:30:10 -0400 Subject: [PATCH 1/2] =?UTF-8?q?feature:=20Phase=2018.1=20=E2=80=94=20deriv?= =?UTF-8?q?e=20Opus=20320=20+=20seek-index=20sidecar=20at=20ingest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Background-job transcode (ffmpeg/libopus) after source store; pure C# Ogg walker builds the 0.5s-bucketed granule→byte seek index + captures the OpusHead/OpusTags setup header into a per-track sidecar in a new track-opus vault. Best-effort, additive, regenerated on replace-audio. --- DeepDrftAPI/Program.cs | 8 + .../Services/Opus/IOpusTranscodeQueue.cs | 18 ++ .../Opus/OpusTranscodeBackgroundService.cs | 72 +++++ DeepDrftAPI/Services/UnifiedTrackService.cs | 14 + DeepDrftAPI/Startup.cs | 22 ++ DeepDrftContent/Constants/VaultConstants.cs | 9 + .../FileDatabase/Models/MediaModels.cs | 1 + .../Processors/Opus/FfmpegOpusEncoder.cs | 138 ++++++++ .../Processors/Opus/OggOpusConstants.cs | 50 +++ .../Processors/Opus/OggOpusParser.cs | 124 +++++++ .../Processors/Opus/OggOpusSeekIndex.cs | 93 ++++++ .../Processors/Opus/OpusSidecar.cs | 57 ++++ .../Processors/Opus/OpusTranscodeOptions.cs | 33 ++ .../Processors/Opus/OpusTranscodeService.cs | 151 +++++++++ DeepDrftTests/NoOpOpusTranscodeQueue.cs | 16 + DeepDrftTests/OggOpusParserTests.cs | 304 ++++++++++++++++++ .../UploadDuplicateDetectionTests.cs | 1 + 17 files changed, 1111 insertions(+) create mode 100644 DeepDrftAPI/Services/Opus/IOpusTranscodeQueue.cs create mode 100644 DeepDrftAPI/Services/Opus/OpusTranscodeBackgroundService.cs create mode 100644 DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs create mode 100644 DeepDrftContent/Processors/Opus/OggOpusConstants.cs create mode 100644 DeepDrftContent/Processors/Opus/OggOpusParser.cs create mode 100644 DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs create mode 100644 DeepDrftContent/Processors/Opus/OpusSidecar.cs create mode 100644 DeepDrftContent/Processors/Opus/OpusTranscodeOptions.cs create mode 100644 DeepDrftContent/Processors/Opus/OpusTranscodeService.cs create mode 100644 DeepDrftTests/NoOpOpusTranscodeQueue.cs create mode 100644 DeepDrftTests/OggOpusParserTests.cs diff --git a/DeepDrftAPI/Program.cs b/DeepDrftAPI/Program.cs index 57ae729..6838e2a 100644 --- a/DeepDrftAPI/Program.cs +++ b/DeepDrftAPI/Program.cs @@ -4,6 +4,7 @@ using DeepDrftAPI; using DeepDrftAPI.Middleware; using DeepDrftAPI.Models; using DeepDrftAPI.Services; +using DeepDrftAPI.Services.Opus; using DeepDrftData; using DeepDrftData.Data; using DeepDrftData.Repositories; @@ -66,6 +67,13 @@ builder.Services .AddScoped(sp => sp.GetRequiredService()); builder.Services.AddScoped(); +// Background Opus transcode (Phase 18.1, OQ6). One singleton is both the enqueue seam +// (IOpusTranscodeQueue, injected into the scoped UnifiedTrackService) and the hosted drain loop +// (IHostedService). It resolves OpusTranscodeService — a singleton — so no scope is captured. +builder.Services.AddSingleton(); +builder.Services.AddSingleton(sp => sp.GetRequiredService()); +builder.Services.AddHostedService(sp => sp.GetRequiredService()); + // Phase 16 anonymous telemetry — append-only event logs + incremental play-counter rollup (all SQL). // EventManager is the IEventService boundary; EventRepository owns the EF writes and the // release-resolution + counter-bump transaction. diff --git a/DeepDrftAPI/Services/Opus/IOpusTranscodeQueue.cs b/DeepDrftAPI/Services/Opus/IOpusTranscodeQueue.cs new file mode 100644 index 0000000..b30f317 --- /dev/null +++ b/DeepDrftAPI/Services/Opus/IOpusTranscodeQueue.cs @@ -0,0 +1,18 @@ +namespace DeepDrftAPI.Services.Opus; + +/// +/// The enqueue seam for the background Opus transcode (OQ6 / §3.1a). +/// depends only on this thin interface — not on the worker — so adding the background derive to the +/// upload/replace paths costs one small dependency, not the whole transcode graph. Enqueuing is +/// non-blocking and best-effort: a freshly uploaded track is already persisted and playable losslessly +/// before anything is enqueued, and the transcode runs off the request thread. +/// +public interface IOpusTranscodeQueue +{ + /// + /// Schedules a background Opus derive for the track identified by . Returns + /// immediately. A dropped or failed enqueue must not affect the caller — the track remains + /// lossless-only and eligible for backfill. + /// + void Enqueue(string entryKey); +} diff --git a/DeepDrftAPI/Services/Opus/OpusTranscodeBackgroundService.cs b/DeepDrftAPI/Services/Opus/OpusTranscodeBackgroundService.cs new file mode 100644 index 0000000..2ff077f --- /dev/null +++ b/DeepDrftAPI/Services/Opus/OpusTranscodeBackgroundService.cs @@ -0,0 +1,72 @@ +using System.Threading.Channels; +using DeepDrftContent.Processors.Opus; + +namespace DeepDrftAPI.Services.Opus; + +/// +/// The background worker behind (OQ6 / §3.1a). An unbounded in-process +/// channel buffers EntryKeys enqueued by the upload and replace-audio paths; a single hosted loop drains +/// them one at a time and runs for each. Serial +/// by design — a transcode is CPU-heavy (§3.1), so running them concurrently would starve request +/// handling; one-at-a-time keeps the derive strictly off the hot path without saturating the host. +/// +/// This worker IS the queue (implements ) so enqueue and drain share one +/// channel with no extra indirection. It is registered as a singleton and surfaced under both the +/// interface and . +/// +public sealed class OpusTranscodeBackgroundService : BackgroundService, IOpusTranscodeQueue +{ + private readonly Channel _channel = + Channel.CreateUnbounded(new UnboundedChannelOptions { SingleReader = true }); + + private readonly OpusTranscodeService _transcodeService; + private readonly ILogger _logger; + + public OpusTranscodeBackgroundService( + OpusTranscodeService transcodeService, + ILogger logger) + { + _transcodeService = transcodeService; + _logger = logger; + } + + public void Enqueue(string entryKey) + { + if (string.IsNullOrWhiteSpace(entryKey)) + return; + + if (!_channel.Writer.TryWrite(entryKey)) + { + // Unbounded writer only rejects after Complete(), i.e. during shutdown. The track stays + // lossless-only and is eligible for backfill, so a dropped enqueue is non-fatal — log it. + _logger.LogWarning("Opus transcode: could not enqueue {EntryKey} (queue closed).", entryKey); + } + } + + protected override async Task ExecuteAsync(CancellationToken stoppingToken) + { + await foreach (var entryKey in _channel.Reader.ReadAllAsync(stoppingToken)) + { + try + { + await _transcodeService.TranscodeAndStoreAsync(entryKey, stoppingToken); + } + catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested) + { + break; // host shutting down + } + catch (Exception ex) + { + // TranscodeAndStoreAsync already swallows expected failures; this guards the loop against + // anything unexpected so one bad track never kills the worker. + _logger.LogError(ex, "Opus transcode: unhandled failure draining {EntryKey}; worker continues.", entryKey); + } + } + } + + public override Task StopAsync(CancellationToken cancellationToken) + { + _channel.Writer.TryComplete(); + return base.StopAsync(cancellationToken); + } +} diff --git a/DeepDrftAPI/Services/UnifiedTrackService.cs b/DeepDrftAPI/Services/UnifiedTrackService.cs index 5c7ef52..f1ef417 100644 --- a/DeepDrftAPI/Services/UnifiedTrackService.cs +++ b/DeepDrftAPI/Services/UnifiedTrackService.cs @@ -1,3 +1,4 @@ +using DeepDrftAPI.Services.Opus; using DeepDrftContent; using DeepDrftContent.Constants; using DeepDrftContent.Processors; @@ -39,6 +40,7 @@ public class UnifiedTrackService private readonly ITrackService _sqlTrackService; private readonly FileDb _fileDatabase; private readonly WaveformProfileService _waveformProfileService; + private readonly IOpusTranscodeQueue _opusTranscodeQueue; private readonly ILogger _logger; public UnifiedTrackService( @@ -46,12 +48,14 @@ public class UnifiedTrackService ITrackService sqlTrackService, FileDb fileDatabase, WaveformProfileService waveformProfileService, + IOpusTranscodeQueue opusTranscodeQueue, ILogger logger) { _contentTrackContentService = contentTrackContentService; _sqlTrackService = sqlTrackService; _fileDatabase = fileDatabase; _waveformProfileService = waveformProfileService; + _opusTranscodeQueue = opusTranscodeQueue; _logger = logger; } @@ -219,6 +223,11 @@ public class UnifiedTrackService // frontend, so a failure here is logged and swallowed — never fails the upload. await TryStoreWaveformDatumsAsync(unpersisted.EntryKey, ct); + // Schedule the low-data Opus derive (OQ6 / §3.1a): the track is persisted and lossless-playable + // NOW; the transcode + seek-index build run on a background worker. Non-blocking and best-effort + // — the upload response never waits on it, and a transcode failure leaves the track lossless-only. + _opusTranscodeQueue.Enqueue(unpersisted.EntryKey); + return saveResult; } @@ -303,6 +312,11 @@ public class UnifiedTrackService return Result.CreateFailResult("Audio replaced but duration metadata could not be updated."); } + // The stale Opus artifact (if any) no longer matches the new source. Schedule a background + // regenerate — the transcode service overwrites the prior artifacts in place keyed by the same + // EntryKey. Best-effort, off the request thread, mirrors the waveform regen above. + _opusTranscodeQueue.Enqueue(entryKey); + return Result.CreatePassResult(); } diff --git a/DeepDrftAPI/Startup.cs b/DeepDrftAPI/Startup.cs index f735fd1..b45fe56 100644 --- a/DeepDrftAPI/Startup.cs +++ b/DeepDrftAPI/Startup.cs @@ -4,6 +4,7 @@ using DeepDrftContent.Constants; using DeepDrftContent.FileDatabase.Models; using DeepDrftContent.FileDatabase.Services; using DeepDrftContent.Processors; +using DeepDrftContent.Processors.Opus; using Microsoft.Extensions.Logging; using NetBlocks.Utilities.Environment; @@ -44,6 +45,7 @@ namespace DeepDrftAPI InitializeTrackVault(db).GetAwaiter().GetResult(); InitializeImageVault(db).GetAwaiter().GetResult(); InitializeTrackWaveformsVault(db).GetAwaiter().GetResult(); + InitializeTrackOpusVault(db).GetAwaiter().GetResult(); return db; }); @@ -65,6 +67,16 @@ namespace DeepDrftAPI Environment.SetEnvironmentVariable("ASPNETCORE_TEMP", stagingPath); builder.Services.AddSingleton(new UploadStagingDirectory(stagingPath)); + // Opus low-data transcode (Phase 18.1). The domain service lives in DeepDrftContent; the host + // owns only the engine config and the background worker. Bitrate/ffmpeg-path come from the + // OpusTranscode config section; StagingPath is forced to the same data-disk staging directory + // the upload path uses so large transcode temp files never land on the /tmp tmpfs. + builder.Services.Configure( + builder.Configuration.GetSection(nameof(OpusTranscodeOptions))); + builder.Services.PostConfigure(o => o.StagingPath = stagingPath); + builder.Services.AddSingleton(); + builder.Services.AddSingleton(); + return Task.CompletedTask; } @@ -107,5 +119,15 @@ namespace DeepDrftAPI await fileDatabase.CreateVaultAsync(VaultConstants.TrackWaveforms, MediaVaultType.Media); } } + + // Ensure the track-opus vault exists (Phase 18.1). Holds the derived low-data Ogg Opus artifacts + // — the Opus audio bytes and the setup-header + seek-index sidecar — keyed by the track's EntryKey. + private static async Task InitializeTrackOpusVault(FileDatabase fileDatabase) + { + if (!fileDatabase.HasVault(VaultConstants.TrackOpus)) + { + await fileDatabase.CreateVaultAsync(VaultConstants.TrackOpus, MediaVaultType.Audio); + } + } } } \ No newline at end of file diff --git a/DeepDrftContent/Constants/VaultConstants.cs b/DeepDrftContent/Constants/VaultConstants.cs index ba1d5fc..906b74e 100644 --- a/DeepDrftContent/Constants/VaultConstants.cs +++ b/DeepDrftContent/Constants/VaultConstants.cs @@ -28,4 +28,13 @@ public static class VaultConstants /// The datum resolution is duration-derived (≈333 samples/sec, see WaveformResolution). /// public const string TrackWaveforms = "track-waveforms"; + + /// + /// Vault name for the derived low-data Ogg Opus artifacts, keyed by the track's EntryKey (Phase 18, + /// S2). Holds two entries per track: the Opus audio bytes (.opus) and the combined setup-header + /// + granule→byte seek-index sidecar (.opusidx). Both are best-effort derived artifacts — + /// regenerable, and a track without them still plays losslessly. Distinct from the source tracks + /// vault so the source means exactly one thing (mirrors the track-waveforms precedent). + /// + public const string TrackOpus = "track-opus"; } \ No newline at end of file diff --git a/DeepDrftContent/FileDatabase/Models/MediaModels.cs b/DeepDrftContent/FileDatabase/Models/MediaModels.cs index 44cc961..703d7a5 100644 --- a/DeepDrftContent/FileDatabase/Models/MediaModels.cs +++ b/DeepDrftContent/FileDatabase/Models/MediaModels.cs @@ -206,6 +206,7 @@ public static class MimeTypeExtensions { ".flac", "audio/flac" }, { ".aac", "audio/aac" }, { ".ogg", "audio/ogg" }, + { ".opus", "audio/ogg" }, { ".m4a", "audio/mp4" } }; diff --git a/DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs b/DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs new file mode 100644 index 0000000..4e7f7e1 --- /dev/null +++ b/DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs @@ -0,0 +1,138 @@ +using System.Diagnostics; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; + +namespace DeepDrftContent.Processors.Opus; + +/// +/// Encodes a source audio file (any format the source vault holds — WAV/MP3/FLAC) to Ogg Opus fullband +/// 320 kbps by shelling out to FFmpeg (libopus). FFmpeg is chosen over a managed encoder because it +/// muxes a correct Ogg container with accurate granule positions across every input format — the page +/// structure the seek-index walk depends on — which a raw libopus binding does not provide. The external +/// ffmpeg binary is therefore a host runtime prerequisite (flagged in the wave handoff). +/// +public sealed class FfmpegOpusEncoder +{ + private readonly OpusTranscodeOptions _options; + private readonly ILogger _logger; + + public FfmpegOpusEncoder(IOptions options, ILogger logger) + { + _options = options.Value; + _logger = logger; + } + + /// + /// Transcodes to an Ogg Opus file at . + /// Returns true on a clean exit with a non-empty output. Returns false (logged) on a non-zero exit, + /// a timeout, a missing ffmpeg binary, or any process failure — a transcode failure must never throw + /// to the caller (C6); the background worker treats false as "leave the track lossless-only". + /// + public async Task EncodeAsync(string sourcePath, string destinationPath, CancellationToken ct) + { + var ffmpeg = string.IsNullOrWhiteSpace(_options.FfmpegPath) ? "ffmpeg" : _options.FfmpegPath; + + // -vn drops any cover-art video stream; -map a:0 takes the first audio stream; -ar 48000 forces + // fullband (Opus internally resamples to 48 kHz anyway, but stating it keeps granulepos math + // unambiguous); libopus VBR at the target bitrate; -f ogg for an explicit Ogg container; -y + // overwrites the (pre-created, empty) destination temp file. + var args = new[] + { + "-hide_banner", "-nostdin", "-loglevel", "error", + "-i", sourcePath, + "-vn", "-map", "a:0", + "-c:a", "libopus", "-b:a", $"{_options.BitrateKbps}k", + "-ar", "48000", + "-f", "ogg", + "-y", destinationPath, + }; + + var psi = new ProcessStartInfo(ffmpeg) + { + RedirectStandardError = true, + RedirectStandardOutput = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + foreach (var arg in args) + psi.ArgumentList.Add(arg); + + using var process = new Process { StartInfo = psi }; + + try + { + if (!process.Start()) + { + _logger.LogError("Opus transcode: ffmpeg failed to start ({Ffmpeg}).", ffmpeg); + return false; + } + } + catch (Exception ex) + { + // Most commonly a missing binary (Win32Exception "file not found"). This is the ops + // prerequisite failing — log loudly so it is unmistakable in the deploy logs. + _logger.LogError(ex, + "Opus transcode: could not launch ffmpeg ({Ffmpeg}). Is the ffmpeg binary installed on the host?", + ffmpeg); + return false; + } + + using var timeout = CancellationTokenSource.CreateLinkedTokenSource(ct); + timeout.CancelAfter(TimeSpan.FromSeconds(_options.TimeoutSeconds)); + + // Drain stderr concurrently — ffmpeg can block writing diagnostics if the pipe is not read. + var stderrTask = process.StandardError.ReadToEndAsync(timeout.Token); + + try + { + await process.WaitForExitAsync(timeout.Token); + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + TryKill(process); + throw; // genuine shutdown cancellation — let it propagate + } + catch (OperationCanceledException) + { + TryKill(process); + _logger.LogError("Opus transcode: ffmpeg exceeded the {Timeout}s timeout for {Source}.", + _options.TimeoutSeconds, sourcePath); + return false; + } + + var stderr = await SafeStderr(stderrTask); + if (process.ExitCode != 0) + { + _logger.LogError("Opus transcode: ffmpeg exited {Code} for {Source}. stderr: {Stderr}", + process.ExitCode, sourcePath, stderr); + return false; + } + + if (!File.Exists(destinationPath) || new FileInfo(destinationPath).Length == 0) + { + _logger.LogError("Opus transcode: ffmpeg exited 0 but produced no output for {Source}.", sourcePath); + return false; + } + + return true; + } + + private void TryKill(Process process) + { + try + { + if (!process.HasExited) + process.Kill(entireProcessTree: true); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Opus transcode: failed to kill timed-out ffmpeg process."); + } + } + + private static async Task SafeStderr(Task stderrTask) + { + try { return await stderrTask; } + catch { return ""; } + } +} diff --git a/DeepDrftContent/Processors/Opus/OggOpusConstants.cs b/DeepDrftContent/Processors/Opus/OggOpusConstants.cs new file mode 100644 index 0000000..f6fa431 --- /dev/null +++ b/DeepDrftContent/Processors/Opus/OggOpusConstants.cs @@ -0,0 +1,50 @@ +namespace DeepDrftContent.Processors.Opus; + +/// +/// Wire-format constants for the Ogg-Opus derived artifacts. Centralised so the seek-index codec, +/// the page walker, and the tests agree on one set of magic numbers. +/// +public static class OggOpusConstants +{ + /// Opus granule positions are always sample counts at 48 kHz, regardless of input rate. + public const double OpusSampleRate = 48000.0; + + /// One seek-index entry per this many seconds of audio (OQ7 — 0.5 s buckets). + public const double SeekBucketSeconds = 0.5; + + /// The Ogg page capture pattern "OggS" — every page starts with these four bytes. + public static ReadOnlySpan CapturePattern => "OggS"u8; + + /// Magic signature opening an OpusHead identification header packet. + public static ReadOnlySpan OpusHeadSignature => "OpusHead"u8; + + /// Magic signature opening an OpusTags comment header packet. + public static ReadOnlySpan OpusTagsSignature => "OpusTags"u8; + + /// + /// Fixed size of an Ogg page header before the segment table: capture(4) + version(1) + + /// header-type(1) + granulepos(8) + serial(4) + sequence(4) + checksum(4) + page-segments(1). + /// + public const int OggPageHeaderSize = 27; + + /// Byte offset of the 64-bit granule position within an Ogg page header. + public const int GranulePositionOffset = 6; + + /// Byte offset of the page-segment count (the segment-table length) within the header. + public const int PageSegmentCountOffset = 26; + + /// Sentinel granule position for a page that ends mid-packet (no usable timestamp). + public const ulong NoGranulePosition = 0xFFFFFFFFFFFFFFFFUL; + + /// Header size of the serialized seek-index blob: totalBytes(8) + duration(8) + count(4). + public const int SeekIndexHeaderSize = 20; + + /// Size of one serialized seek point: granulepos(8) + byteOffset(8). + public const int SeekPointSize = 16; + + /// Vault-resource extension for the Opus audio bytes. + public const string OpusExtension = ".opus"; + + /// Vault-resource extension for the combined setup-header + seek-index sidecar. + public const string SidecarExtension = ".opusidx"; +} diff --git a/DeepDrftContent/Processors/Opus/OggOpusParser.cs b/DeepDrftContent/Processors/Opus/OggOpusParser.cs new file mode 100644 index 0000000..98452b4 --- /dev/null +++ b/DeepDrftContent/Processors/Opus/OggOpusParser.cs @@ -0,0 +1,124 @@ +using System.Buffers.Binary; + +namespace DeepDrftContent.Processors.Opus; + +/// +/// The result of walking an encoded Ogg Opus stream once: the captured setup header (the leading +/// OpusHead + OpusTags pages, verbatim) and the bucketed granule→byte seek index. This +/// is everything the sidecar artifact carries (§3.4a) — built at transcode time so delivery never +/// re-walks the stream. +/// +/// The leading setup pages (OpusHead + OpusTags), exactly as they +/// appear at the start of the stream, ready to prepend to any mid-stream page run before decode. +/// The accurate, 0.5 s-bucketed granule→byte transfer function. +public sealed record OggOpusWalk(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex); + +/// +/// Pure Ogg-Opus stream walker. Reads the page structure directly (the OggS capture pattern and +/// the 27-byte page header) to (1) capture the setup-header pages and (2) record, for every audio page, +/// its end granule position and exact byte offset — bucketed to 0.5 s with each bucket boundary snapped +/// to the nearest enclosing page start. No external dependency: the encoder (FFmpeg) produces the bytes; +/// this turns them into the seek artifact deterministically, so it is unit-testable without a codec. +/// +public static class OggOpusParser +{ + /// + /// Walks and produces the setup header + seek index, or null if the + /// bytes are not a recognisable Ogg Opus stream (no setup header, no audio pages, or truncated + /// structure). A null is the caller's signal to treat the transcode as failed and leave the track + /// lossless-only (C6) — it does not throw for malformed input. + /// + public static OggOpusWalk? Walk(ReadOnlySpan oggBytes) + { + var setupHeaderEnd = -1; + var sawOpusHead = false; + var sawOpusTags = false; + + var points = new List(); + ulong lastGranule = 0; + var nextBucketTime = 0.0; + var firstAudioPointTaken = false; + + var offset = 0; + while (offset + OggOpusConstants.OggPageHeaderSize <= oggBytes.Length) + { + var page = oggBytes.Slice(offset); + if (!page[..4].SequenceEqual(OggOpusConstants.CapturePattern)) + { + // Not on a page boundary — the encoder writes contiguous pages, so this means the + // stream is malformed or we mis-stepped. Either way it is unrecoverable here. + return null; + } + + var segmentCount = page[OggOpusConstants.PageSegmentCountOffset]; + var segmentTableEnd = OggOpusConstants.OggPageHeaderSize + segmentCount; + if (segmentTableEnd > page.Length) + return null; // truncated header + + var payloadSize = 0; + for (var i = 0; i < segmentCount; i++) + payloadSize += page[OggOpusConstants.OggPageHeaderSize + i]; + + var pageTotalSize = segmentTableEnd + payloadSize; + if (pageTotalSize > page.Length) + return null; // truncated payload + + var payload = page.Slice(segmentTableEnd, payloadSize); + var granule = BinaryPrimitives.ReadUInt64LittleEndian( + page.Slice(OggOpusConstants.GranulePositionOffset, 8)); + + // The setup pages carry no audio granule (OpusHead has granulepos 0; OpusTags too). They + // are the leading pages whose payload opens with the Opus magic signatures. + if (!sawOpusHead && StartsWith(payload, OggOpusConstants.OpusHeadSignature)) + { + sawOpusHead = true; + setupHeaderEnd = offset + pageTotalSize; + } + else if (sawOpusHead && !sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature)) + { + sawOpusTags = true; + setupHeaderEnd = offset + pageTotalSize; + } + else if (sawOpusHead && sawOpusTags) + { + // Audio page. Record the first audio page unconditionally (the seek anchor at t=0), + // then one entry per 0.5 s bucket. A page with no end-granule (mid-packet continuation, + // granulepos == -1) is skipped for indexing — its time is unknown — but still advances + // the byte cursor. + if (granule != OggOpusConstants.NoGranulePosition) + { + var pageTime = granule / OggOpusConstants.OpusSampleRate; + if (!firstAudioPointTaken) + { + points.Add(new OpusSeekPoint(granule, (ulong)offset)); + firstAudioPointTaken = true; + nextBucketTime = OggOpusConstants.SeekBucketSeconds; + } + else if (pageTime >= nextBucketTime) + { + points.Add(new OpusSeekPoint(granule, (ulong)offset)); + // Advance past every bucket this page crossed so a long page does not emit a + // backlog of entries; the next bucket is the first boundary strictly after it. + while (nextBucketTime <= pageTime) + nextBucketTime += OggOpusConstants.SeekBucketSeconds; + } + + lastGranule = granule; + } + } + + offset += pageTotalSize; + } + + if (!sawOpusHead || setupHeaderEnd < 0 || points.Count == 0) + return null; + + var setupHeader = oggBytes[..setupHeaderEnd].ToArray(); + var totalDuration = lastGranule / OggOpusConstants.OpusSampleRate; + var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length); + return new OggOpusWalk(setupHeader, index); + } + + private static bool StartsWith(ReadOnlySpan payload, ReadOnlySpan signature) => + payload.Length >= signature.Length && payload[..signature.Length].SequenceEqual(signature); +} diff --git a/DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs b/DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs new file mode 100644 index 0000000..b5bff4c --- /dev/null +++ b/DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs @@ -0,0 +1,93 @@ +using System.Buffers.Binary; + +namespace DeepDrftContent.Processors.Opus; + +/// +/// A single seek-index entry: an authoritative 48 kHz (Opus granule +/// positions are always sample counts at 48 kHz, so time = granulepos / 48000) paired with the exact +/// byte offset of the Ogg page that carries it. Every is a real page-start +/// boundary, so a Range: bytes={ByteOffset}- fetch lands the decoder Ogg-sync-aligned. +/// +/// The page's end granule position (48 kHz sample count). +/// The byte offset of the page start in the Opus file. +public readonly record struct OpusSeekPoint(ulong GranulePosition, ulong ByteOffset) +{ + /// Time in seconds this granule position represents (granulepos / 48 kHz). + public double TimeSeconds => GranulePosition / OggOpusConstants.OpusSampleRate; +} + +/// +/// The accurate, precomputed transfer function from seek-time to true file byte offset for one Ogg +/// Opus stream (§3.4a A). Built once at transcode time by walking the encoded stream; the client reads +/// it back and binary-searches instead of doing inaccurate VBR byte-rate math. +/// One entry per 0.5 s of audio (), each snapped to the +/// nearest enclosing page start, plus the totals needed to clamp a seek to range. +/// +/// Ordered (granulepos, byteOffset) entries, ascending. The first is always the +/// first audio page (offset just past the setup headers). +/// Total stream duration from the final granule position. +/// Total Opus file byte length, for clamping a seek past the end. +public sealed record OggOpusSeekIndex( + IReadOnlyList Points, + double TotalDurationSeconds, + ulong TotalByteLength) +{ + /// + /// Serializes the index to the compact little-endian binary blob the sidecar stores. Layout: + /// [uint64 totalByteLength][double totalDurationSeconds][uint32 pointCount] then + /// pointCount × (uint64 granulepos, uint64 byteOffset). Fixed-width records keep the client + /// parse to a single typed-array read. + /// + public byte[] ToBytes() + { + var size = OggOpusConstants.SeekIndexHeaderSize + Points.Count * OggOpusConstants.SeekPointSize; + var bytes = new byte[size]; + var span = bytes.AsSpan(); + + BinaryPrimitives.WriteUInt64LittleEndian(span[..8], TotalByteLength); + BinaryPrimitives.WriteDoubleLittleEndian(span.Slice(8, 8), TotalDurationSeconds); + BinaryPrimitives.WriteUInt32LittleEndian(span.Slice(16, 4), (uint)Points.Count); + + var cursor = OggOpusConstants.SeekIndexHeaderSize; + foreach (var point in Points) + { + BinaryPrimitives.WriteUInt64LittleEndian(span.Slice(cursor, 8), point.GranulePosition); + BinaryPrimitives.WriteUInt64LittleEndian(span.Slice(cursor + 8, 8), point.ByteOffset); + cursor += OggOpusConstants.SeekPointSize; + } + + return bytes; + } + + /// + /// Parses a blob produced by . Returns null if the blob is too short or its + /// declared point count does not fit — the storage contract is exact, so a malformed blob is a + /// corruption signal, not a recoverable shape. (Provided so tests and any future server-side reader + /// share one codec with the writer.) + /// + public static OggOpusSeekIndex? FromBytes(ReadOnlySpan bytes) + { + if (bytes.Length < OggOpusConstants.SeekIndexHeaderSize) + return null; + + var totalByteLength = BinaryPrimitives.ReadUInt64LittleEndian(bytes[..8]); + var totalDuration = BinaryPrimitives.ReadDoubleLittleEndian(bytes.Slice(8, 8)); + var count = BinaryPrimitives.ReadUInt32LittleEndian(bytes.Slice(16, 4)); + + var expected = OggOpusConstants.SeekIndexHeaderSize + (long)count * OggOpusConstants.SeekPointSize; + if (bytes.Length < expected) + return null; + + var points = new OpusSeekPoint[count]; + var cursor = OggOpusConstants.SeekIndexHeaderSize; + for (var i = 0; i < count; i++) + { + var granule = BinaryPrimitives.ReadUInt64LittleEndian(bytes.Slice(cursor, 8)); + var offset = BinaryPrimitives.ReadUInt64LittleEndian(bytes.Slice(cursor + 8, 8)); + points[i] = new OpusSeekPoint(granule, offset); + cursor += OggOpusConstants.SeekPointSize; + } + + return new OggOpusSeekIndex(points, totalDuration, totalByteLength); + } +} diff --git a/DeepDrftContent/Processors/Opus/OpusSidecar.cs b/DeepDrftContent/Processors/Opus/OpusSidecar.cs new file mode 100644 index 0000000..07cd594 --- /dev/null +++ b/DeepDrftContent/Processors/Opus/OpusSidecar.cs @@ -0,0 +1,57 @@ +using System.Buffers.Binary; + +namespace DeepDrftContent.Processors.Opus; + +/// +/// The single derived sidecar artifact per track (§3.4a B, recommended design): the Opus setup header +/// (OpusHead + OpusTags) followed by the granule→byte seek index. The client fetches this +/// once on track load and parses it into its OpusSeekData, so it always has both the setup bytes +/// (to prepend to any mid-stream slice) and the accurate seek transfer function before it ever issues a +/// Range fetch — including a window that opens away from byte 0 (UC9). +/// +/// The verbatim OpusHead + OpusTags pages. +/// The bucketed granule→byte seek index. +public sealed record OpusSidecar(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex) +{ + /// + /// Serializes to [uint32 setupHeaderLength][setup-header bytes][seek-index blob]. The + /// length prefix lets the client split the two regions with one read; the seek-index blob carries + /// its own self-describing header (), so it needs no trailing + /// length. + /// + public byte[] ToBytes() + { + var indexBytes = SeekIndex.ToBytes(); + var bytes = new byte[4 + SetupHeaderBytes.Length + indexBytes.Length]; + var span = bytes.AsSpan(); + + BinaryPrimitives.WriteUInt32LittleEndian(span[..4], (uint)SetupHeaderBytes.Length); + SetupHeaderBytes.CopyTo(span.Slice(4)); + indexBytes.CopyTo(span.Slice(4 + SetupHeaderBytes.Length)); + + return bytes; + } + + /// + /// Parses a blob produced by . Returns null on any structural inconsistency + /// (short blob, length prefix that overruns, or an unparseable index) — the format is exact, so a + /// malformed blob is corruption. + /// + public static OpusSidecar? FromBytes(ReadOnlySpan bytes) + { + if (bytes.Length < 4) + return null; + + var setupLength = BinaryPrimitives.ReadUInt32LittleEndian(bytes[..4]); + var indexStart = 4 + (long)setupLength; + if (bytes.Length < indexStart) + return null; + + var setupHeader = bytes.Slice(4, (int)setupLength).ToArray(); + var index = OggOpusSeekIndex.FromBytes(bytes.Slice((int)indexStart)); + if (index is null) + return null; + + return new OpusSidecar(setupHeader, index); + } +} diff --git a/DeepDrftContent/Processors/Opus/OpusTranscodeOptions.cs b/DeepDrftContent/Processors/Opus/OpusTranscodeOptions.cs new file mode 100644 index 0000000..ac81a7a --- /dev/null +++ b/DeepDrftContent/Processors/Opus/OpusTranscodeOptions.cs @@ -0,0 +1,33 @@ +namespace DeepDrftContent.Processors.Opus; + +/// +/// Host-supplied configuration for the Opus transcode. The only operationally significant knob is +/// — the transcode shells out to FFmpeg (libopus), which must be present on the +/// DeepDrftAPI host (see the wave handoff notes). Defaults target Ogg Opus fullband (48 kHz) at 320 kbps, +/// the artifact the spec fixes (§1). +/// +public sealed class OpusTranscodeOptions +{ + /// + /// Path to the ffmpeg executable. Empty/null resolves to "ffmpeg" (found on PATH). Override + /// with an absolute path when the binary is not on the host PATH. + /// + public string FfmpegPath { get; set; } = "ffmpeg"; + + /// Target Opus bitrate in kbps. 320 kbps fullband is the fixed artifact quality (§1). + public int BitrateKbps { get; set; } = 320; + + /// + /// Directory for the transient source/output files the transcode stages. Defaults to the system + /// temp path; the host overrides it to the data-disk upload-staging directory so large files never + /// land on the small RAM-backed /tmp tmpfs (same constraint the upload path already honours). + /// + public string StagingPath { get; set; } = Path.GetTempPath(); + + /// + /// Hard ceiling on a single transcode, in seconds. A run that exceeds it is killed and the track + /// stays lossless-only (C6). Generous by default — a 1 GB mix is CPU-expensive (§3.1) — but bounded + /// so a hung ffmpeg never wedges the background worker. + /// + public int TimeoutSeconds { get; set; } = 3600; +} diff --git a/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs b/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs new file mode 100644 index 0000000..712344c --- /dev/null +++ b/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs @@ -0,0 +1,151 @@ +using DeepDrftContent.Constants; +using DeepDrftContent.FileDatabase.Models; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase; + +namespace DeepDrftContent.Processors.Opus; + +/// +/// Derives and persists a track's low-data Ogg Opus artifacts (Phase 18.1). Mirrors +/// 's derived-artifact lifecycle: compute from the stored source, +/// store in a dedicated vault keyed by EntryKey, regenerable, failure-tolerant. For one track it +/// produces two entries in the vault — the Opus audio bytes and a +/// combined setup-header + seek-index sidecar (§3.4a). Strictly additive: the source tracks vault +/// is never touched, and a failure here leaves the track lossless-only and eligible for backfill (C2/C6). +/// +public sealed class OpusTranscodeService +{ + private readonly FileDb _fileDatabase; + private readonly FfmpegOpusEncoder _encoder; + private readonly OpusTranscodeOptions _options; + private readonly ILogger _logger; + + public OpusTranscodeService( + FileDb fileDatabase, + FfmpegOpusEncoder encoder, + IOptions options, + ILogger logger) + { + _fileDatabase = fileDatabase; + _encoder = encoder; + _options = options.Value; + _logger = logger; + } + + /// + /// Reads the source audio for from the tracks vault, transcodes it + /// to Ogg Opus 320, walks the encoded stream to build the seek index + capture the setup header, and + /// stores the Opus bytes and the sidecar in the vault under the + /// same key. Re-runnable — a second call overwrites the prior artifacts (backfill / replace-audio). + /// Returns false (logged) on any failure; never throws for expected failure modes (C6). The only + /// propagated exception is on genuine shutdown. + /// + public async Task TranscodeAndStoreAsync(string entryKey, CancellationToken ct) + { + var source = await _fileDatabase.LoadResourceAsync(VaultConstants.Tracks, entryKey); + if (source is null) + { + _logger.LogWarning("Opus transcode: no source audio in vault for {EntryKey}; skipping.", entryKey); + return false; + } + + Directory.CreateDirectory(_options.StagingPath); + var sourcePath = Path.Combine(_options.StagingPath, $"opus-src-{Guid.NewGuid():N}{source.Extension}"); + var opusPath = Path.Combine(_options.StagingPath, $"opus-out-{Guid.NewGuid():N}{OggOpusConstants.OpusExtension}"); + + try + { + await File.WriteAllBytesAsync(sourcePath, source.Buffer, ct); + + if (!await _encoder.EncodeAsync(sourcePath, opusPath, ct)) + return false; // encoder already logged the cause + + var opusBytes = await File.ReadAllBytesAsync(opusPath, ct); + + var walk = OggOpusParser.Walk(opusBytes); + if (walk is null) + { + _logger.LogError( + "Opus transcode: ffmpeg produced output but the Ogg stream could not be walked for {EntryKey}; " + + "no artifacts stored.", entryKey); + return false; + } + + await EnsureVaultAsync(); + + var opusBitrate = source.Duration > 0 + ? (int)(opusBytes.Length * 8 / source.Duration / 1000) + : _options.BitrateKbps; + var audioBinary = new AudioBinary(new AudioBinaryParams( + opusBytes, opusBytes.Length, OggOpusConstants.OpusExtension, source.Duration, opusBitrate)); + + var sidecar = new OpusSidecar(walk.SetupHeaderBytes, walk.SeekIndex).ToBytes(); + var sidecarBinary = new MediaBinary(new MediaBinaryParams( + sidecar, sidecar.Length, OggOpusConstants.SidecarExtension)); + + // Store the audio first, then the sidecar. If the sidecar write fails the Opus bytes are + // present but unseekable — treat that as a failed derive (return false) so a backfill re-runs + // it; do not leave a half-derived track that the delivery layer would treat as complete. + var audioStored = await _fileDatabase.RegisterResourceAsync( + VaultConstants.TrackOpus, OpusAudioKey(entryKey), audioBinary); + if (!audioStored) + { + _logger.LogError("Opus transcode: vault write of Opus audio failed for {EntryKey}.", entryKey); + return false; + } + + var sidecarStored = await _fileDatabase.RegisterResourceAsync( + VaultConstants.TrackOpus, OpusSidecarKey(entryKey), sidecarBinary); + if (!sidecarStored) + { + _logger.LogError("Opus transcode: vault write of sidecar failed for {EntryKey}.", entryKey); + return false; + } + + _logger.LogInformation( + "Opus transcode complete for {EntryKey}: {OpusBytes} bytes, {Points} seek points, {Duration:F1}s.", + entryKey, opusBytes.Length, walk.SeekIndex.Points.Count, walk.SeekIndex.TotalDurationSeconds); + return true; + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + throw; + } + catch (Exception ex) + { + _logger.LogError(ex, "Opus transcode failed for {EntryKey}; track stays lossless-only.", entryKey); + return false; + } + finally + { + TryDelete(sourcePath); + TryDelete(opusPath); + } + } + + /// The vault entry key under which a track's Opus audio bytes are stored. + public static string OpusAudioKey(string entryKey) => entryKey; + + /// The vault entry key under which a track's setup-header + seek-index sidecar is stored. + public static string OpusSidecarKey(string entryKey) => $"{entryKey}-sidecar"; + + private async Task EnsureVaultAsync() + { + if (!_fileDatabase.HasVault(VaultConstants.TrackOpus)) + await _fileDatabase.CreateVaultAsync(VaultConstants.TrackOpus, MediaVaultType.Audio); + } + + private void TryDelete(string path) + { + try + { + if (File.Exists(path)) + File.Delete(path); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Opus transcode: failed to delete staging file {Path}.", path); + } + } +} diff --git a/DeepDrftTests/NoOpOpusTranscodeQueue.cs b/DeepDrftTests/NoOpOpusTranscodeQueue.cs new file mode 100644 index 0000000..33a6103 --- /dev/null +++ b/DeepDrftTests/NoOpOpusTranscodeQueue.cs @@ -0,0 +1,16 @@ +using DeepDrftAPI.Services.Opus; + +namespace DeepDrftTests; + +/// +/// Test double for . The background Opus derive is out of scope for the +/// upload/dual-database tests — those assert SQL + source-vault behaviour, which is unchanged by Phase 18 +/// (Opus is strictly additive). Enqueuing is a no-op so the upload path under test never spins up a real +/// transcode. Records the enqueued keys in case a test wants to assert the scheduling contract. +/// +public sealed class NoOpOpusTranscodeQueue : IOpusTranscodeQueue +{ + public List Enqueued { get; } = []; + + public void Enqueue(string entryKey) => Enqueued.Add(entryKey); +} diff --git a/DeepDrftTests/OggOpusParserTests.cs b/DeepDrftTests/OggOpusParserTests.cs new file mode 100644 index 0000000..2a7728f --- /dev/null +++ b/DeepDrftTests/OggOpusParserTests.cs @@ -0,0 +1,304 @@ +using System.Buffers.Binary; +using System.Text; +using DeepDrftContent.Processors.Opus; + +namespace DeepDrftTests; + +/// +/// Coverage for the Phase 18.1 seek-index + setup-header extraction (§3.4a). These exercise the pure +/// Ogg-Opus walker and the sidecar codec over hand-built Ogg streams — no ffmpeg dependency — so the +/// granule→byte mapping, page-boundary snapping, 0.5 s bucketing, clamp totals, and setup-header capture +/// are asserted deterministically. The byte layout mirrors a real Opus stream: an OpusHead page, an +/// OpusTags page, then audio pages each carrying an end granule position at 48 kHz. +/// +[TestFixture] +public class OggOpusParserTests +{ + [Test] + public void Walk_CapturesSetupHeader_AsLeadingOpusHeadAndOpusTagsPagesVerbatim() + { + var head = OggPage(granule: 0, OpusHeadPacket()); + var tags = OggPage(granule: 0, OpusTagsPacket()); + var audio = OggPage(granule: 48000, AudioPacket(64)); + + var stream = Concat(head, tags, audio); + + var walk = OggOpusParser.Walk(stream); + + Assert.That(walk, Is.Not.Null, "A well-formed Ogg Opus stream must walk"); + var expectedSetup = Concat(head, tags); + Assert.That(walk!.SetupHeaderBytes, Is.EqualTo(expectedSetup), + "Setup header must be the OpusHead + OpusTags pages, byte-for-byte, and stop before the first audio page"); + } + + [Test] + public void Walk_FirstSeekPoint_IsTheFirstAudioPageAtItsExactByteOffset() + { + var head = OggPage(granule: 0, OpusHeadPacket()); + var tags = OggPage(granule: 0, OpusTagsPacket()); + var audio = OggPage(granule: 48000, AudioPacket(64)); + + var stream = Concat(head, tags, audio); + var firstAudioOffset = (ulong)(head.Length + tags.Length); + + var walk = OggOpusParser.Walk(stream); + + Assert.That(walk, Is.Not.Null); + var first = walk!.SeekIndex.Points[0]; + Assert.That(first.ByteOffset, Is.EqualTo(firstAudioOffset), + "The first seek point must land on the first audio page's start offset (an exact page boundary)"); + Assert.That(first.GranulePosition, Is.EqualTo(48000UL)); + } + + [Test] + public void Walk_EverySeekOffset_LandsOnARealPageBoundary() + { + // Ten audio pages, ~0.25 s each (12000 samples). Bucketing at 0.5 s means roughly every other + // page is indexed; every indexed offset must still be the start of some OggS page. + var head = OggPage(granule: 0, OpusHeadPacket()); + var tags = OggPage(granule: 0, OpusTagsPacket()); + + var pages = new List { head, tags }; + ulong granule = 0; + for (var i = 0; i < 10; i++) + { + granule += 12000; // 0.25 s at 48 kHz + pages.Add(OggPage(granule, AudioPacket(50 + i))); + } + + var stream = Concat(pages.ToArray()); + var pageOffsets = CollectPageStartOffsets(stream); + + var walk = OggOpusParser.Walk(stream); + Assert.That(walk, Is.Not.Null); + + foreach (var point in walk!.SeekIndex.Points) + { + Assert.That(pageOffsets, Does.Contain(point.ByteOffset), + $"Seek offset {point.ByteOffset} must be a real OggS page start"); + } + } + + [Test] + public void Walk_Bucketing_EmitsRoughlyOneEntryPerHalfSecond() + { + // Twenty audio pages of 0.25 s each = 5 s total. At 0.5 s buckets we expect ~10 entries + // (the first audio page is always taken, then one per crossed half-second boundary). + var head = OggPage(granule: 0, OpusHeadPacket()); + var tags = OggPage(granule: 0, OpusTagsPacket()); + + var pages = new List { head, tags }; + ulong granule = 0; + for (var i = 0; i < 20; i++) + { + granule += 12000; // 0.25 s + pages.Add(OggPage(granule, AudioPacket(40))); + } + + var stream = Concat(pages.ToArray()); + var walk = OggOpusParser.Walk(stream); + + Assert.That(walk, Is.Not.Null); + // 5 s of audio → first point + one per 0.5 s boundary up to 5.0 s. Allow a small tolerance for + // boundary rounding, but it must be far below "one per page" (20) and at least the ~10 buckets. + Assert.That(walk!.SeekIndex.Points.Count, Is.InRange(9, 12), + "Bucketing must coalesce ~0.25 s pages into ~0.5 s index entries, not one per page"); + } + + [Test] + public void Walk_PointsAreStrictlyAscending_InBothGranuleAndOffset() + { + var head = OggPage(granule: 0, OpusHeadPacket()); + var tags = OggPage(granule: 0, OpusTagsPacket()); + + var pages = new List { head, tags }; + ulong granule = 0; + for (var i = 0; i < 12; i++) + { + granule += 24000; // 0.5 s — one index entry per page + pages.Add(OggPage(granule, AudioPacket(30))); + } + + var walk = OggOpusParser.Walk(Concat(pages.ToArray())); + Assert.That(walk, Is.Not.Null); + + var points = walk!.SeekIndex.Points; + for (var i = 1; i < points.Count; i++) + { + Assert.That(points[i].GranulePosition, Is.GreaterThan(points[i - 1].GranulePosition)); + Assert.That(points[i].ByteOffset, Is.GreaterThan(points[i - 1].ByteOffset)); + } + } + + [Test] + public void Walk_ClampValues_ReflectFinalGranuleAndTotalByteLength() + { + var head = OggPage(granule: 0, OpusHeadPacket()); + var tags = OggPage(granule: 0, OpusTagsPacket()); + var a1 = OggPage(granule: 48000, AudioPacket(64)); // 1.0 s + var a2 = OggPage(granule: 144000, AudioPacket(64)); // 3.0 s (final) + + var stream = Concat(head, tags, a1, a2); + + var walk = OggOpusParser.Walk(stream); + Assert.That(walk, Is.Not.Null); + + Assert.That(walk!.SeekIndex.TotalByteLength, Is.EqualTo((ulong)stream.Length), + "Total byte length must equal the full stream length for end-of-stream clamping"); + Assert.That(walk.SeekIndex.TotalDurationSeconds, Is.EqualTo(3.0).Within(1e-9), + "Total duration must derive from the final page's granule position (144000 / 48000 = 3.0 s)"); + } + + [Test] + public void Walk_MalformedStream_ReturnsNull_RatherThanThrowing() + { + var notOgg = Encoding.ASCII.GetBytes("this is not an ogg stream at all"); + Assert.That(OggOpusParser.Walk(notOgg), Is.Null); + + // OpusHead present but no audio pages → no seek points → null (nothing to index). + var headOnly = Concat(OggPage(0, OpusHeadPacket()), OggPage(0, OpusTagsPacket())); + Assert.That(OggOpusParser.Walk(headOnly), Is.Null); + } + + [Test] + public void SeekIndex_RoundTrips_ThroughBinaryEncoding() + { + var points = new[] + { + new OpusSeekPoint(48000, 200), + new OpusSeekPoint(72000, 512), + new OpusSeekPoint(96000, 900), + }; + var index = new OggOpusSeekIndex(points, TotalDurationSeconds: 2.0, TotalByteLength: 1024); + + var restored = OggOpusSeekIndex.FromBytes(index.ToBytes()); + + Assert.That(restored, Is.Not.Null); + Assert.That(restored!.TotalByteLength, Is.EqualTo(1024UL)); + Assert.That(restored.TotalDurationSeconds, Is.EqualTo(2.0)); + Assert.That(restored.Points, Is.EqualTo(points)); + } + + [Test] + public void Sidecar_RoundTrips_PreservingSetupHeaderAndIndex() + { + var setup = Encoding.ASCII.GetBytes("OpusHead-and-OpusTags-bytes-go-here"); + var index = new OggOpusSeekIndex( + new[] { new OpusSeekPoint(48000, 200), new OpusSeekPoint(96000, 700) }, + TotalDurationSeconds: 2.0, TotalByteLength: 800); + var sidecar = new OpusSidecar(setup, index); + + var restored = OpusSidecar.FromBytes(sidecar.ToBytes()); + + Assert.That(restored, Is.Not.Null); + Assert.That(restored!.SetupHeaderBytes, Is.EqualTo(setup), + "The sidecar must preserve the setup header so the client can prepend it to mid-stream slices"); + Assert.That(restored.SeekIndex.Points, Is.EqualTo(index.Points)); + Assert.That(restored.SeekIndex.TotalByteLength, Is.EqualTo(800UL)); + } + + [Test] + public void Sidecar_FromBytes_RejectsTruncatedBlob() + { + Assert.That(OpusSidecar.FromBytes(new byte[2]), Is.Null, "A blob shorter than the length prefix is corruption"); + + // A length prefix that overruns the buffer must be rejected, not over-read. + var bad = new byte[8]; + BinaryPrimitives.WriteUInt32LittleEndian(bad, 9999); + Assert.That(OpusSidecar.FromBytes(bad), Is.Null); + } + + // ---- Ogg stream construction helpers (minimal, single-packet pages) ---- + + // Builds one Ogg page wrapping a single packet payload with the given end granule position. The page + // header layout matches the spec the parser reads: capture "OggS", version, header-type, granulepos, + // serial, sequence, checksum (zeroed — the parser does not verify CRC), page-segments, segment table. + private static byte[] OggPage(ulong granule, byte[] packet) + { + // Lacing: a packet of length L is split into 255-byte segments plus a final < 255 segment. + var segments = new List(); + var remaining = packet.Length; + while (remaining >= 255) + { + segments.Add(255); + remaining -= 255; + } + segments.Add((byte)remaining); + + var header = new byte[OggOpusConstants.OggPageHeaderSize + segments.Count]; + OggOpusConstants.CapturePattern.CopyTo(header); + header[4] = 0; // version + header[5] = 0; // header-type flags + BinaryPrimitives.WriteUInt64LittleEndian(header.AsSpan(OggOpusConstants.GranulePositionOffset, 8), granule); + BinaryPrimitives.WriteUInt32LittleEndian(header.AsSpan(14, 4), 0xDEAD); // serial + BinaryPrimitives.WriteUInt32LittleEndian(header.AsSpan(18, 4), 0); // sequence (unused by parser) + BinaryPrimitives.WriteUInt32LittleEndian(header.AsSpan(22, 4), 0); // checksum (unverified) + header[OggOpusConstants.PageSegmentCountOffset] = (byte)segments.Count; + for (var i = 0; i < segments.Count; i++) + header[OggOpusConstants.OggPageHeaderSize + i] = segments[i]; + + return Concat(header, packet); + } + + private static byte[] OpusHeadPacket() + { + // "OpusHead" + a minimal valid-ish identification header tail (version, channels, pre-skip, etc.). + var tail = new byte[] { 1, 2, 0, 0, 0x80, 0xBB, 0, 0, 0, 0, 0 }; + return Concat(OggOpusConstants.OpusHeadSignature.ToArray(), tail); + } + + private static byte[] OpusTagsPacket() + { + // "OpusTags" + a tiny vendor string region (length-prefixed) + zero user comments. + var vendor = Encoding.ASCII.GetBytes("test"); + var packet = new List(); + packet.AddRange(OggOpusConstants.OpusTagsSignature.ToArray()); + packet.AddRange(BitConverter.GetBytes((uint)vendor.Length)); + packet.AddRange(vendor); + packet.AddRange(BitConverter.GetBytes(0u)); // user comment count + return packet.ToArray(); + } + + private static byte[] AudioPacket(int size) + { + var packet = new byte[size]; + for (var i = 0; i < size; i++) + packet[i] = (byte)(i & 0xFF); + return packet; + } + + private static List CollectPageStartOffsets(byte[] stream) + { + var offsets = new List(); + var span = stream.AsSpan(); + var offset = 0; + while (offset + OggOpusConstants.OggPageHeaderSize <= span.Length) + { + var page = span.Slice(offset); + if (!page[..4].SequenceEqual(OggOpusConstants.CapturePattern)) + break; + + var segmentCount = page[OggOpusConstants.PageSegmentCountOffset]; + var payload = 0; + for (var i = 0; i < segmentCount; i++) + payload += page[OggOpusConstants.OggPageHeaderSize + i]; + + offsets.Add((ulong)offset); + offset += OggOpusConstants.OggPageHeaderSize + segmentCount + payload; + } + return offsets; + } + + private static byte[] Concat(params byte[][] parts) + { + var total = parts.Sum(p => p.Length); + var result = new byte[total]; + var cursor = 0; + foreach (var part in parts) + { + part.CopyTo(result, cursor); + cursor += part.Length; + } + return result; + } +} diff --git a/DeepDrftTests/UploadDuplicateDetectionTests.cs b/DeepDrftTests/UploadDuplicateDetectionTests.cs index ab8fceb..97af62e 100644 --- a/DeepDrftTests/UploadDuplicateDetectionTests.cs +++ b/DeepDrftTests/UploadDuplicateDetectionTests.cs @@ -77,6 +77,7 @@ public class UploadDuplicateDetectionTests return new UnifiedTrackService( content, sqlTrackService, fileDatabase!, waveforms, + new NoOpOpusTranscodeQueue(), NullLogger.Instance); } From 6add30a4ff71d281ceec8a8d829d8cd9fb27474d Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Tue, 23 Jun 2026 06:55:31 -0400 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20Wave=2018.1=20review=20=E2=80=94=20p?= =?UTF-8?q?re-skip=20subtraction,=20t=3D0=20anchor,=20PreSkip=20in=20sidec?= =?UTF-8?q?ar,=20stderr=20on=20cancel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Processors/Opus/FfmpegOpusEncoder.cs | 2 + .../Processors/Opus/OggOpusConstants.cs | 19 +- .../Processors/Opus/OggOpusParser.cs | 34 +++- .../Processors/Opus/OggOpusSeekIndex.cs | 57 ++++-- .../Processors/Opus/OpusTranscodeService.cs | 3 + DeepDrftTests/OggOpusParserTests.cs | 181 +++++++++++++++--- 6 files changed, 251 insertions(+), 45 deletions(-) diff --git a/DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs b/DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs index 4e7f7e1..65af701 100644 --- a/DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs +++ b/DeepDrftContent/Processors/Opus/FfmpegOpusEncoder.cs @@ -90,11 +90,13 @@ public sealed class FfmpegOpusEncoder catch (OperationCanceledException) when (ct.IsCancellationRequested) { TryKill(process); + await SafeStderr(stderrTask); // observe to avoid unobserved-task warnings throw; // genuine shutdown cancellation — let it propagate } catch (OperationCanceledException) { TryKill(process); + await SafeStderr(stderrTask); // observe to avoid unobserved-task warnings _logger.LogError("Opus transcode: ffmpeg exceeded the {Timeout}s timeout for {Source}.", _options.TimeoutSeconds, sourcePath); return false; diff --git a/DeepDrftContent/Processors/Opus/OggOpusConstants.cs b/DeepDrftContent/Processors/Opus/OggOpusConstants.cs index f6fa431..fbba891 100644 --- a/DeepDrftContent/Processors/Opus/OggOpusConstants.cs +++ b/DeepDrftContent/Processors/Opus/OggOpusConstants.cs @@ -36,8 +36,23 @@ public static class OggOpusConstants /// Sentinel granule position for a page that ends mid-packet (no usable timestamp). public const ulong NoGranulePosition = 0xFFFFFFFFFFFFFFFFUL; - /// Header size of the serialized seek-index blob: totalBytes(8) + duration(8) + count(4). - public const int SeekIndexHeaderSize = 20; + /// + /// Minimum byte length of an OpusHead packet payload to safely read pre_skip. + /// RFC 7845 §5.1: "OpusHead"(8) + version(1) + channels(1) + pre_skip(2) = 12 bytes minimum. + /// + public const int OpusHeadMinSize = 12; + + /// + /// Byte offset of pre_skip within the full OpusHead packet payload (including the + /// magic). RFC 7845 §5.1: "OpusHead"(8) + version(1) + channels(1) = 10 bytes before pre_skip. + /// + public const int OpusHeadPreSkipOffset = 10; + + /// + /// Header size of the serialized seek-index blob: + /// totalBytes(8) + duration(8) + count(4) + preSkip(2) + reserved(2) = 24 bytes. + /// + public const int SeekIndexHeaderSize = 24; /// Size of one serialized seek point: granulepos(8) + byteOffset(8). public const int SeekPointSize = 16; diff --git a/DeepDrftContent/Processors/Opus/OggOpusParser.cs b/DeepDrftContent/Processors/Opus/OggOpusParser.cs index 98452b4..0e4f850 100644 --- a/DeepDrftContent/Processors/Opus/OggOpusParser.cs +++ b/DeepDrftContent/Processors/Opus/OggOpusParser.cs @@ -33,6 +33,7 @@ public static class OggOpusParser var setupHeaderEnd = -1; var sawOpusHead = false; var sawOpusTags = false; + ushort preSkip = 0; var points = new List(); ulong lastGranule = 0; @@ -73,6 +74,15 @@ public static class OggOpusParser { sawOpusHead = true; setupHeaderEnd = offset + pageTotalSize; + + // RFC 7845 §5.1 — OpusHead layout after the 8-byte "OpusHead" magic: + // [0] version (1 byte), [1] channel count (1 byte), + // [2-3] pre_skip (little-endian uint16) ← at packet bytes 10-11 + // pre_skip is the number of decoder samples to discard before presenting audio; + // all granule→time conversions must subtract it (RFC 7845 §4.3). + if (payload.Length >= OggOpusConstants.OpusHeadMinSize) + preSkip = BinaryPrimitives.ReadUInt16LittleEndian( + payload.Slice(OggOpusConstants.OpusHeadPreSkipOffset, 2)); } else if (sawOpusHead && !sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature)) { @@ -87,19 +97,28 @@ public static class OggOpusParser // the byte cursor. if (granule != OggOpusConstants.NoGranulePosition) { - var pageTime = granule / OggOpusConstants.OpusSampleRate; + // RFC 7845 §4.3: presentation time = max(0, granule − preSkip) / 48000. + // Use this corrected time for bucketing so that a stream with pre-skip 3840 (~80 ms) + // does not systematically offset every indexed time by that amount. + var correctedTime = Math.Max(0.0, + (granule - (double)preSkip) / OggOpusConstants.OpusSampleRate); + if (!firstAudioPointTaken) { - points.Add(new OpusSeekPoint(granule, (ulong)offset)); + // Anchor the first seek point at corrected time = 0 by storing the granule as + // preSkip. This guarantees that a binary search for t=0 ("largest entry with + // corrected time ≤ 0") always resolves to the first audio page's byte offset — + // even when the real granule is slightly above preSkip due to encoder lead-in. + points.Add(new OpusSeekPoint(preSkip, (ulong)offset)); firstAudioPointTaken = true; nextBucketTime = OggOpusConstants.SeekBucketSeconds; } - else if (pageTime >= nextBucketTime) + else if (correctedTime >= nextBucketTime) { points.Add(new OpusSeekPoint(granule, (ulong)offset)); // Advance past every bucket this page crossed so a long page does not emit a // backlog of entries; the next bucket is the first boundary strictly after it. - while (nextBucketTime <= pageTime) + while (nextBucketTime <= correctedTime) nextBucketTime += OggOpusConstants.SeekBucketSeconds; } @@ -114,8 +133,11 @@ public static class OggOpusParser return null; var setupHeader = oggBytes[..setupHeaderEnd].ToArray(); - var totalDuration = lastGranule / OggOpusConstants.OpusSampleRate; - var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length); + // RFC 7845 §4.3: total duration is also pre-skip-corrected, matching the time a listener + // experiences (the last audio page's corrected time, clamped to ≥ 0). + var totalDuration = Math.Max(0.0, + (lastGranule - (double)preSkip) / OggOpusConstants.OpusSampleRate); + var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length, preSkip); return new OggOpusWalk(setupHeader, index); } diff --git a/DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs b/DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs index b5bff4c..465a1b5 100644 --- a/DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs +++ b/DeepDrftContent/Processors/Opus/OggOpusSeekIndex.cs @@ -4,16 +4,25 @@ namespace DeepDrftContent.Processors.Opus; /// /// A single seek-index entry: an authoritative 48 kHz (Opus granule -/// positions are always sample counts at 48 kHz, so time = granulepos / 48000) paired with the exact -/// byte offset of the Ogg page that carries it. Every is a real page-start -/// boundary, so a Range: bytes={ByteOffset}- fetch lands the decoder Ogg-sync-aligned. +/// positions are always sample counts at 48 kHz) paired with the exact byte offset of the Ogg page that +/// carries it. Every is a real page-start boundary, so a +/// Range: bytes={ByteOffset}- fetch lands the decoder Ogg-sync-aligned. /// +/// +/// Per RFC 7845 §4.3, the PCM presentation time is (granulepos − preSkip) / 48000. The raw +/// is stored here as-is; callers should subtract the containing +/// before converting to a presentation time. Use +/// for the corrected value. +/// /// The page's end granule position (48 kHz sample count). /// The byte offset of the page start in the Opus file. public readonly record struct OpusSeekPoint(ulong GranulePosition, ulong ByteOffset) { - /// Time in seconds this granule position represents (granulepos / 48 kHz). - public double TimeSeconds => GranulePosition / OggOpusConstants.OpusSampleRate; + /// + /// Raw granule-position-to-time conversion (granulepos / 48 kHz). Does NOT subtract pre-skip — use + /// for the RFC 7845-correct presentation time. + /// + public double RawTimeSeconds => GranulePosition / OggOpusConstants.OpusSampleRate; } /// @@ -23,20 +32,38 @@ public readonly record struct OpusSeekPoint(ulong GranulePosition, ulong ByteOff /// One entry per 0.5 s of audio (), each snapped to the /// nearest enclosing page start, plus the totals needed to clamp a seek to range. /// -/// Ordered (granulepos, byteOffset) entries, ascending. The first is always the -/// first audio page (offset just past the setup headers). -/// Total stream duration from the final granule position. +/// Ordered (granulepos, byteOffset) entries, ascending. The first entry always +/// has == (corrected time = 0) +/// and points at the first audio page start, ensuring a seek to t=0 always resolves. +/// +/// Pre-skip-corrected total stream duration: max(0, lastGranule − preSkip) / 48000. +/// /// Total Opus file byte length, for clamping a seek past the end. +/// +/// The pre_skip value from the OpusHead identification header (RFC 7845 §5.1). Opus +/// decoders must discard this many samples from the decoded start before presenting audio. The client +/// (wave 18.4) needs this to trim the first decoded buffer; storing it here avoids a re-parse of the +/// Ogg stream at delivery time. +/// public sealed record OggOpusSeekIndex( IReadOnlyList Points, double TotalDurationSeconds, - ulong TotalByteLength) + ulong TotalByteLength, + ushort PreSkip) { + /// + /// Returns the RFC 7845-correct presentation time for a seek point: max(0, granule − preSkip) / 48000. + /// Use this for all time comparisons; raw omits the pre-skip. + /// + public double PresentationTimeSeconds(OpusSeekPoint point) => + Math.Max(0.0, (point.GranulePosition - (double)PreSkip) / OggOpusConstants.OpusSampleRate); + /// /// Serializes the index to the compact little-endian binary blob the sidecar stores. Layout: - /// [uint64 totalByteLength][double totalDurationSeconds][uint32 pointCount] then - /// pointCount × (uint64 granulepos, uint64 byteOffset). Fixed-width records keep the client - /// parse to a single typed-array read. + /// [uint64 totalByteLength][double totalDurationSeconds][uint32 pointCount][uint16 preSkip][uint16 reserved] + /// then pointCount × (uint64 granulepos, uint64 byteOffset). The four-byte preSkip+reserved + /// region pads the header to 24 bytes, keeping the point table 8-byte-aligned. + /// Fixed-width records keep the client parse to a single typed-array read. /// public byte[] ToBytes() { @@ -47,6 +74,8 @@ public sealed record OggOpusSeekIndex( BinaryPrimitives.WriteUInt64LittleEndian(span[..8], TotalByteLength); BinaryPrimitives.WriteDoubleLittleEndian(span.Slice(8, 8), TotalDurationSeconds); BinaryPrimitives.WriteUInt32LittleEndian(span.Slice(16, 4), (uint)Points.Count); + BinaryPrimitives.WriteUInt16LittleEndian(span.Slice(20, 2), PreSkip); + // bytes 22-23: reserved (zero-initialized by array allocation) var cursor = OggOpusConstants.SeekIndexHeaderSize; foreach (var point in Points) @@ -73,6 +102,8 @@ public sealed record OggOpusSeekIndex( var totalByteLength = BinaryPrimitives.ReadUInt64LittleEndian(bytes[..8]); var totalDuration = BinaryPrimitives.ReadDoubleLittleEndian(bytes.Slice(8, 8)); var count = BinaryPrimitives.ReadUInt32LittleEndian(bytes.Slice(16, 4)); + var preSkip = BinaryPrimitives.ReadUInt16LittleEndian(bytes.Slice(20, 2)); + // bytes 22-23: reserved — ignored on read for forward-compatibility var expected = OggOpusConstants.SeekIndexHeaderSize + (long)count * OggOpusConstants.SeekPointSize; if (bytes.Length < expected) @@ -88,6 +119,6 @@ public sealed record OggOpusSeekIndex( cursor += OggOpusConstants.SeekPointSize; } - return new OggOpusSeekIndex(points, totalDuration, totalByteLength); + return new OggOpusSeekIndex(points, totalDuration, totalByteLength, preSkip); } } diff --git a/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs b/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs index 712344c..dba73e9 100644 --- a/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs +++ b/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs @@ -132,6 +132,9 @@ public sealed class OpusTranscodeService private async Task EnsureVaultAsync() { + // The TrackOpus vault is created at host startup (Startup.cs), so this guard is normally a + // no-op for the upload path. It is retained for the backfill path, which may run via a + // standalone CLI or a host that skips vault pre-creation, where the vault might not exist. if (!_fileDatabase.HasVault(VaultConstants.TrackOpus)) await _fileDatabase.CreateVaultAsync(VaultConstants.TrackOpus, MediaVaultType.Audio); } diff --git a/DeepDrftTests/OggOpusParserTests.cs b/DeepDrftTests/OggOpusParserTests.cs index 2a7728f..9d024f9 100644 --- a/DeepDrftTests/OggOpusParserTests.cs +++ b/DeepDrftTests/OggOpusParserTests.cs @@ -7,13 +7,19 @@ namespace DeepDrftTests; /// /// Coverage for the Phase 18.1 seek-index + setup-header extraction (§3.4a). These exercise the pure /// Ogg-Opus walker and the sidecar codec over hand-built Ogg streams — no ffmpeg dependency — so the -/// granule→byte mapping, page-boundary snapping, 0.5 s bucketing, clamp totals, and setup-header capture -/// are asserted deterministically. The byte layout mirrors a real Opus stream: an OpusHead page, an -/// OpusTags page, then audio pages each carrying an end granule position at 48 kHz. +/// granule→byte mapping, pre-skip correction (RFC 7845 §4.3), page-boundary snapping, 0.5 s bucketing, +/// t=0 anchor, clamp totals, and setup-header capture are asserted deterministically. The byte layout +/// mirrors a real Opus stream: an OpusHead page, an OpusTags page, then audio pages each carrying an +/// end granule position at 48 kHz. /// [TestFixture] public class OggOpusParserTests { + // libopus default pre-skip: 312 samples at 48 kHz (≈ 6.5 ms). FFmpeg may use 3840 (~80 ms). + // Using 312 here as a realistic non-zero value that is small enough not to affect the test + // granules (which start at 48000), while still exercising the pre-skip subtraction path. + private const ushort TestPreSkip = 312; + [Test] public void Walk_CapturesSetupHeader_AsLeadingOpusHeadAndOpusTagsPagesVerbatim() { @@ -32,9 +38,11 @@ public class OggOpusParserTests } [Test] - public void Walk_FirstSeekPoint_IsTheFirstAudioPageAtItsExactByteOffset() + public void Walk_FirstSeekPoint_IsAnchoredAtTimeZero_PointingAtFirstAudioPage() { - var head = OggPage(granule: 0, OpusHeadPacket()); + // Use a non-zero pre-skip to verify that the first seek point is explicitly anchored at + // corrected time = 0, not at the raw granule time. + var head = OggPage(granule: 0, OpusHeadPacket(preSkip: TestPreSkip)); var tags = OggPage(granule: 0, OpusTagsPacket()); var audio = OggPage(granule: 48000, AudioPacket(64)); @@ -45,9 +53,87 @@ public class OggOpusParserTests Assert.That(walk, Is.Not.Null); var first = walk!.SeekIndex.Points[0]; + + // The first point's byte offset must be the first audio page start (exact page boundary). Assert.That(first.ByteOffset, Is.EqualTo(firstAudioOffset), "The first seek point must land on the first audio page's start offset (an exact page boundary)"); - Assert.That(first.GranulePosition, Is.EqualTo(48000UL)); + + // The first point's stored granule is clamped to preSkip so corrected presentation time = 0. + // This guarantees a binary search for t=0 always resolves to the first audio page. + Assert.That(walk.SeekIndex.PreSkip, Is.EqualTo(TestPreSkip), + "PreSkip must be parsed from OpusHead and carried into the seek index"); + Assert.That(walk.SeekIndex.PresentationTimeSeconds(first), Is.EqualTo(0.0).Within(1e-12), + "First seek point must have corrected presentation time = 0 so a seek to t=0 always resolves"); + } + + [Test] + public void Walk_PreSkip_IsSubtractedFromGranuleInTimeCalculations() + { + // Pre-skip of 3840 samples (≈ 80 ms, the libopus typical value used by ffmpeg). + // Without the fix, pageTime = 48000 / 48000 = 1.0 s; with fix, (48000 - 3840) / 48000 = 0.92 s. + const ushort preSkip = 3840; + var head = OggPage(granule: 0, OpusHeadPacket(preSkip: preSkip)); + var tags = OggPage(granule: 0, OpusTagsPacket()); + // First audio page at granule 48000 (1.0 s raw; 0.92 s corrected) + var a1 = OggPage(granule: 48000, AudioPacket(64)); + // Second audio page at granule 96000 (2.0 s raw; 1.92 s corrected) + var a2 = OggPage(granule: 96000, AudioPacket(64)); + // Third audio page at granule 144000 (3.0 s raw; 2.92 s corrected) + var a3 = OggPage(granule: 144000, AudioPacket(64)); + + var walk = OggOpusParser.Walk(Concat(head, tags, a1, a2, a3)); + + Assert.That(walk, Is.Not.Null); + var index = walk!.SeekIndex; + + Assert.That(index.PreSkip, Is.EqualTo(preSkip), "PreSkip must be parsed from OpusHead"); + + // TotalDurationSeconds must be pre-skip-corrected: (144000 - 3840) / 48000 = 2.92 s + var expectedDuration = (144000.0 - preSkip) / 48000.0; + Assert.That(index.TotalDurationSeconds, Is.EqualTo(expectedDuration).Within(1e-9), + "TotalDurationSeconds must subtract preSkip (RFC 7845 §4.3), not use raw lastGranule / 48000"); + + // The second indexed point (first real bucket) must have corrected time, not raw time. + // With correctedTime(a2) = 1.92 s and bucket = 0.5 s, it should fall in the 1.5 s bucket. + if (index.Points.Count > 1) + { + var secondPoint = index.Points[1]; + var corrected = index.PresentationTimeSeconds(secondPoint); + Assert.That(corrected, Is.GreaterThan(0.0), + "Non-first indexed points must have positive corrected presentation times"); + Assert.That(secondPoint.RawTimeSeconds, Is.GreaterThan(corrected), + "Raw time must be greater than corrected time when pre-skip > 0"); + } + } + + [Test] + public void Walk_SeekToZero_ResolvesToFirstAudioPageOffset_WithNonZeroPreSkip() + { + // This is the AC9 / Critical-2 regression test: a seek to t=0 must resolve to the first audio + // page's byte offset, not produce "no entry found". With the old code (no t=0 anchor and no + // pre-skip correction), the first indexed point had correctedTime ≈ 0.92 s (for preSkip=3840), + // so a binary search for t=0 would find no entry with time ≤ 0 and fail. + const ushort preSkip = 3840; + var head = OggPage(granule: 0, OpusHeadPacket(preSkip: preSkip)); + var tags = OggPage(granule: 0, OpusTagsPacket()); + var a1 = OggPage(granule: 48000, AudioPacket(64)); + var a2 = OggPage(granule: 96000, AudioPacket(64)); + + var stream = Concat(head, tags, a1, a2); + var firstAudioByteOffset = (ulong)(head.Length + tags.Length); + + var walk = OggOpusParser.Walk(stream); + Assert.That(walk, Is.Not.Null); + + var index = walk!.SeekIndex; + var firstPoint = index.Points[0]; + + // Simulate the binary search: find the largest entry with PresentationTimeSeconds ≤ 0. + // With the fix, the first point has corrected time = 0.0, so it IS found. + Assert.That(index.PresentationTimeSeconds(firstPoint), Is.EqualTo(0.0).Within(1e-12), + "First point corrected time must be exactly 0.0 so binary search for t=0 resolves it"); + Assert.That(firstPoint.ByteOffset, Is.EqualTo(firstAudioByteOffset), + "The t=0 anchor must point at the first audio page's byte offset, not the stream start"); } [Test] @@ -82,8 +168,8 @@ public class OggOpusParserTests [Test] public void Walk_Bucketing_EmitsRoughlyOneEntryPerHalfSecond() { - // Twenty audio pages of 0.25 s each = 5 s total. At 0.5 s buckets we expect ~10 entries - // (the first audio page is always taken, then one per crossed half-second boundary). + // Twenty audio pages of 0.25 s each = 5 s total (zero pre-skip). At 0.5 s buckets: + // first point (anchored at t=0) + one per 0.5 s boundary = 1 + 10 = 11 entries expected. var head = OggPage(granule: 0, OpusHeadPacket()); var tags = OggPage(granule: 0, OpusTagsPacket()); @@ -99,9 +185,9 @@ public class OggOpusParserTests var walk = OggOpusParser.Walk(stream); Assert.That(walk, Is.Not.Null); - // 5 s of audio → first point + one per 0.5 s boundary up to 5.0 s. Allow a small tolerance for - // boundary rounding, but it must be far below "one per page" (20) and at least the ~10 buckets. - Assert.That(walk!.SeekIndex.Points.Count, Is.InRange(9, 12), + // 5 s of audio with 0.5 s buckets: 1 anchor + 10 bucket crossings = 11 entries. + // Accept 10–12 for floating-point boundary tolerance, but must be far below 20 (one per page). + Assert.That(walk!.SeekIndex.Points.Count, Is.InRange(10, 12), "Bucketing must coalesce ~0.25 s pages into ~0.5 s index entries, not one per page"); } @@ -131,12 +217,13 @@ public class OggOpusParserTests } [Test] - public void Walk_ClampValues_ReflectFinalGranuleAndTotalByteLength() + public void Walk_ClampValues_ReflectPreSkipCorrectedDurationAndTotalByteLength() { - var head = OggPage(granule: 0, OpusHeadPacket()); + const ushort preSkip = 312; + var head = OggPage(granule: 0, OpusHeadPacket(preSkip: preSkip)); var tags = OggPage(granule: 0, OpusTagsPacket()); - var a1 = OggPage(granule: 48000, AudioPacket(64)); // 1.0 s - var a2 = OggPage(granule: 144000, AudioPacket(64)); // 3.0 s (final) + var a1 = OggPage(granule: 48000, AudioPacket(64)); // 1.0 s raw; ~0.9935 s corrected + var a2 = OggPage(granule: 144000, AudioPacket(64)); // 3.0 s raw; ~2.9935 s corrected (final) var stream = Concat(head, tags, a1, a2); @@ -145,8 +232,13 @@ public class OggOpusParserTests Assert.That(walk!.SeekIndex.TotalByteLength, Is.EqualTo((ulong)stream.Length), "Total byte length must equal the full stream length for end-of-stream clamping"); - Assert.That(walk.SeekIndex.TotalDurationSeconds, Is.EqualTo(3.0).Within(1e-9), - "Total duration must derive from the final page's granule position (144000 / 48000 = 3.0 s)"); + + var expectedDuration = (144000.0 - preSkip) / 48000.0; + Assert.That(walk.SeekIndex.TotalDurationSeconds, Is.EqualTo(expectedDuration).Within(1e-9), + "TotalDurationSeconds must be pre-skip-corrected: (lastGranule - preSkip) / 48000"); + + Assert.That(walk.SeekIndex.PreSkip, Is.EqualTo(preSkip), + "PreSkip must round-trip through the seek index"); } [Test] @@ -165,27 +257,58 @@ public class OggOpusParserTests { var points = new[] { - new OpusSeekPoint(48000, 200), + new OpusSeekPoint(312, 200), // first point anchored at preSkip new OpusSeekPoint(72000, 512), new OpusSeekPoint(96000, 900), }; - var index = new OggOpusSeekIndex(points, TotalDurationSeconds: 2.0, TotalByteLength: 1024); + var index = new OggOpusSeekIndex(points, TotalDurationSeconds: 2.0, TotalByteLength: 1024, + PreSkip: 312); var restored = OggOpusSeekIndex.FromBytes(index.ToBytes()); Assert.That(restored, Is.Not.Null); Assert.That(restored!.TotalByteLength, Is.EqualTo(1024UL)); Assert.That(restored.TotalDurationSeconds, Is.EqualTo(2.0)); + Assert.That(restored.PreSkip, Is.EqualTo((ushort)312), + "PreSkip must survive the binary round-trip"); Assert.That(restored.Points, Is.EqualTo(points)); } + [Test] + public void SeekIndex_PresentationTimeSeconds_SubtractsPreSkip() + { + const ushort preSkip = 3840; + var point = new OpusSeekPoint(GranulePosition: 48000, ByteOffset: 200); + var index = new OggOpusSeekIndex( + new[] { point }, TotalDurationSeconds: 0.92, TotalByteLength: 500, PreSkip: preSkip); + + var corrected = index.PresentationTimeSeconds(point); + var expected = (48000.0 - preSkip) / 48000.0; // ≈ 0.92 s + + Assert.That(corrected, Is.EqualTo(expected).Within(1e-9), + "PresentationTimeSeconds must return (granule - preSkip) / 48000, not raw granule / 48000"); + } + + [Test] + public void SeekIndex_PresentationTimeSeconds_ClampsToZeroForFirstAnchorPoint() + { + const ushort preSkip = 3840; + // First anchor point: granule stored as preSkip, so corrected time = 0. + var firstPoint = new OpusSeekPoint(GranulePosition: preSkip, ByteOffset: 150); + var index = new OggOpusSeekIndex( + new[] { firstPoint }, TotalDurationSeconds: 2.0, TotalByteLength: 500, PreSkip: preSkip); + + Assert.That(index.PresentationTimeSeconds(firstPoint), Is.EqualTo(0.0).Within(1e-12), + "The t=0 anchor point (granule == preSkip) must yield corrected time = 0.0 exactly"); + } + [Test] public void Sidecar_RoundTrips_PreservingSetupHeaderAndIndex() { var setup = Encoding.ASCII.GetBytes("OpusHead-and-OpusTags-bytes-go-here"); var index = new OggOpusSeekIndex( - new[] { new OpusSeekPoint(48000, 200), new OpusSeekPoint(96000, 700) }, - TotalDurationSeconds: 2.0, TotalByteLength: 800); + new[] { new OpusSeekPoint(312, 200), new OpusSeekPoint(96000, 700) }, + TotalDurationSeconds: 2.0, TotalByteLength: 800, PreSkip: 312); var sidecar = new OpusSidecar(setup, index); var restored = OpusSidecar.FromBytes(sidecar.ToBytes()); @@ -195,6 +318,8 @@ public class OggOpusParserTests "The sidecar must preserve the setup header so the client can prepend it to mid-stream slices"); Assert.That(restored.SeekIndex.Points, Is.EqualTo(index.Points)); Assert.That(restored.SeekIndex.TotalByteLength, Is.EqualTo(800UL)); + Assert.That(restored.SeekIndex.PreSkip, Is.EqualTo((ushort)312), + "PreSkip must survive the sidecar binary round-trip"); } [Test] @@ -240,10 +365,18 @@ public class OggOpusParserTests return Concat(header, packet); } - private static byte[] OpusHeadPacket() + private static byte[] OpusHeadPacket(ushort preSkip = 0) { - // "OpusHead" + a minimal valid-ish identification header tail (version, channels, pre-skip, etc.). - var tail = new byte[] { 1, 2, 0, 0, 0x80, 0xBB, 0, 0, 0, 0, 0 }; + // "OpusHead" + RFC 7845 §5.1 identification header: + // [0] version = 1, [1] channel count = 2, + // [2-3] pre_skip (little-endian uint16), [4-7] input sample rate = 0xBB80 = 48000, + // [8-9] output gain = 0, [10] channel mapping family = 0. + var tail = new byte[11]; + tail[0] = 1; // version + tail[1] = 2; // channels + BinaryPrimitives.WriteUInt16LittleEndian(tail.AsSpan(2, 2), preSkip); // pre_skip + BinaryPrimitives.WriteUInt32LittleEndian(tail.AsSpan(4, 4), 48000); // input sample rate + tail[10] = 0; // channel mapping family return Concat(OggOpusConstants.OpusHeadSignature.ToArray(), tail); }