Merge streaming-overhaul into dev (Opus low-data streaming, windowed streaming, HW-accel-off stabilization)

This commit is contained in:
daniel-c-harvey
2026-06-26 11:14:59 -04:00
97 changed files with 10086 additions and 591 deletions
@@ -28,4 +28,13 @@ public static class VaultConstants
/// The datum resolution is duration-derived (≈333 samples/sec, see <c>WaveformResolution</c>).
/// </summary>
public const string TrackWaveforms = "track-waveforms";
/// <summary>
/// Vault name for the derived low-data Ogg Opus artifacts, keyed by the track's EntryKey (Phase 18,
/// S2). Holds two entries per track: the Opus audio bytes (<c>.opus</c>) and the combined setup-header
/// + granule→byte seek-index sidecar (<c>.opusidx</c>). Both are best-effort derived artifacts —
/// regenerable, and a track without them still plays losslessly. Distinct from the source <c>tracks</c>
/// vault so the source means exactly one thing (mirrors the <c>track-waveforms</c> precedent).
/// </summary>
public const string TrackOpus = "track-opus";
}
@@ -206,6 +206,7 @@ public static class MimeTypeExtensions
{ ".flac", "audio/flac" },
{ ".aac", "audio/aac" },
{ ".ogg", "audio/ogg" },
{ ".opus", "audio/ogg" },
{ ".m4a", "audio/mp4" }
};
@@ -0,0 +1,140 @@
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// Encodes a source audio file (any format the source vault holds — WAV/MP3/FLAC) to Ogg Opus fullband
/// 320 kbps by shelling out to FFmpeg (libopus). FFmpeg is chosen over a managed encoder because it
/// muxes a correct Ogg container with accurate granule positions across every input format — the page
/// structure the seek-index walk depends on — which a raw libopus binding does not provide. The external
/// <c>ffmpeg</c> binary is therefore a host runtime prerequisite (flagged in the wave handoff).
/// </summary>
public sealed class FfmpegOpusEncoder
{
private readonly OpusTranscodeOptions _options;
private readonly ILogger<FfmpegOpusEncoder> _logger;
public FfmpegOpusEncoder(IOptions<OpusTranscodeOptions> options, ILogger<FfmpegOpusEncoder> logger)
{
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Transcodes <paramref name="sourcePath"/> to an Ogg Opus file at <paramref name="destinationPath"/>.
/// Returns true on a clean exit with a non-empty output. Returns false (logged) on a non-zero exit,
/// a timeout, a missing ffmpeg binary, or any process failure — a transcode failure must never throw
/// to the caller (C6); the background worker treats false as "leave the track lossless-only".
/// </summary>
public async Task<bool> EncodeAsync(string sourcePath, string destinationPath, CancellationToken ct)
{
var ffmpeg = string.IsNullOrWhiteSpace(_options.FfmpegPath) ? "ffmpeg" : _options.FfmpegPath;
// -vn drops any cover-art video stream; -map a:0 takes the first audio stream; -ar 48000 forces
// fullband (Opus internally resamples to 48 kHz anyway, but stating it keeps granulepos math
// unambiguous); libopus VBR at the target bitrate; -f ogg for an explicit Ogg container; -y
// overwrites the (pre-created, empty) destination temp file.
var args = new[]
{
"-hide_banner", "-nostdin", "-loglevel", "error",
"-i", sourcePath,
"-vn", "-map", "a:0",
"-c:a", "libopus", "-b:a", $"{_options.BitrateKbps}k",
"-ar", "48000",
"-f", "ogg",
"-y", destinationPath,
};
var psi = new ProcessStartInfo(ffmpeg)
{
RedirectStandardError = true,
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true,
};
foreach (var arg in args)
psi.ArgumentList.Add(arg);
using var process = new Process { StartInfo = psi };
try
{
if (!process.Start())
{
_logger.LogError("Opus transcode: ffmpeg failed to start ({Ffmpeg}).", ffmpeg);
return false;
}
}
catch (Exception ex)
{
// Most commonly a missing binary (Win32Exception "file not found"). This is the ops
// prerequisite failing — log loudly so it is unmistakable in the deploy logs.
_logger.LogError(ex,
"Opus transcode: could not launch ffmpeg ({Ffmpeg}). Is the ffmpeg binary installed on the host?",
ffmpeg);
return false;
}
using var timeout = CancellationTokenSource.CreateLinkedTokenSource(ct);
timeout.CancelAfter(TimeSpan.FromSeconds(_options.TimeoutSeconds));
// Drain stderr concurrently — ffmpeg can block writing diagnostics if the pipe is not read.
var stderrTask = process.StandardError.ReadToEndAsync(timeout.Token);
try
{
await process.WaitForExitAsync(timeout.Token);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
TryKill(process);
await SafeStderr(stderrTask); // observe to avoid unobserved-task warnings
throw; // genuine shutdown cancellation — let it propagate
}
catch (OperationCanceledException)
{
TryKill(process);
await SafeStderr(stderrTask); // observe to avoid unobserved-task warnings
_logger.LogError("Opus transcode: ffmpeg exceeded the {Timeout}s timeout for {Source}.",
_options.TimeoutSeconds, sourcePath);
return false;
}
var stderr = await SafeStderr(stderrTask);
if (process.ExitCode != 0)
{
_logger.LogError("Opus transcode: ffmpeg exited {Code} for {Source}. stderr: {Stderr}",
process.ExitCode, sourcePath, stderr);
return false;
}
if (!File.Exists(destinationPath) || new FileInfo(destinationPath).Length == 0)
{
_logger.LogError("Opus transcode: ffmpeg exited 0 but produced no output for {Source}.", sourcePath);
return false;
}
return true;
}
private void TryKill(Process process)
{
try
{
if (!process.HasExited)
process.Kill(entireProcessTree: true);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Opus transcode: failed to kill timed-out ffmpeg process.");
}
}
private static async Task<string> SafeStderr(Task<string> stderrTask)
{
try { return await stderrTask; }
catch { return "<stderr unavailable>"; }
}
}
@@ -0,0 +1,65 @@
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// Wire-format constants for the Ogg-Opus derived artifacts. Centralised so the seek-index codec,
/// the page walker, and the tests agree on one set of magic numbers.
/// </summary>
public static class OggOpusConstants
{
/// <summary>Opus granule positions are always sample counts at 48 kHz, regardless of input rate.</summary>
public const double OpusSampleRate = 48000.0;
/// <summary>One seek-index entry per this many seconds of audio (OQ7 — 0.5 s buckets).</summary>
public const double SeekBucketSeconds = 0.5;
/// <summary>The Ogg page capture pattern "OggS" — every page starts with these four bytes.</summary>
public static ReadOnlySpan<byte> CapturePattern => "OggS"u8;
/// <summary>Magic signature opening an OpusHead identification header packet.</summary>
public static ReadOnlySpan<byte> OpusHeadSignature => "OpusHead"u8;
/// <summary>Magic signature opening an OpusTags comment header packet.</summary>
public static ReadOnlySpan<byte> OpusTagsSignature => "OpusTags"u8;
/// <summary>
/// Fixed size of an Ogg page header before the segment table: capture(4) + version(1) +
/// header-type(1) + granulepos(8) + serial(4) + sequence(4) + checksum(4) + page-segments(1).
/// </summary>
public const int OggPageHeaderSize = 27;
/// <summary>Byte offset of the 64-bit granule position within an Ogg page header.</summary>
public const int GranulePositionOffset = 6;
/// <summary>Byte offset of the page-segment count (the segment-table length) within the header.</summary>
public const int PageSegmentCountOffset = 26;
/// <summary>Sentinel granule position for a page that ends mid-packet (no usable timestamp).</summary>
public const ulong NoGranulePosition = 0xFFFFFFFFFFFFFFFFUL;
/// <summary>
/// Minimum byte length of an <c>OpusHead</c> packet payload to safely read <c>pre_skip</c>.
/// RFC 7845 §5.1: "OpusHead"(8) + version(1) + channels(1) + pre_skip(2) = 12 bytes minimum.
/// </summary>
public const int OpusHeadMinSize = 12;
/// <summary>
/// Byte offset of <c>pre_skip</c> within the full <c>OpusHead</c> packet payload (including the
/// magic). RFC 7845 §5.1: "OpusHead"(8) + version(1) + channels(1) = 10 bytes before pre_skip.
/// </summary>
public const int OpusHeadPreSkipOffset = 10;
/// <summary>
/// Header size of the serialized seek-index blob:
/// totalBytes(8) + duration(8) + count(4) + preSkip(2) + reserved(2) = 24 bytes.
/// </summary>
public const int SeekIndexHeaderSize = 24;
/// <summary>Size of one serialized seek point: granulepos(8) + byteOffset(8).</summary>
public const int SeekPointSize = 16;
/// <summary>Vault-resource extension for the Opus audio bytes.</summary>
public const string OpusExtension = ".opus";
/// <summary>Vault-resource extension for the combined setup-header + seek-index sidecar.</summary>
public const string SidecarExtension = ".opusidx";
}
@@ -0,0 +1,146 @@
using System.Buffers.Binary;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// The result of walking an encoded Ogg Opus stream once: the captured setup header (the leading
/// <c>OpusHead</c> + <c>OpusTags</c> pages, verbatim) and the bucketed granule→byte seek index. This
/// is everything the sidecar artifact carries (§3.4a) — built at transcode time so delivery never
/// re-walks the stream.
/// </summary>
/// <param name="SetupHeaderBytes">The leading setup pages (OpusHead + OpusTags), exactly as they
/// appear at the start of the stream, ready to prepend to any mid-stream page run before decode.</param>
/// <param name="SeekIndex">The accurate, 0.5 s-bucketed granule→byte transfer function.</param>
public sealed record OggOpusWalk(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex);
/// <summary>
/// Pure Ogg-Opus stream walker. Reads the page structure directly (the <c>OggS</c> capture pattern and
/// the 27-byte page header) to (1) capture the setup-header pages and (2) record, for every audio page,
/// its end granule position and exact byte offset — bucketed to 0.5 s with each bucket boundary snapped
/// to the nearest enclosing page start. No external dependency: the encoder (FFmpeg) produces the bytes;
/// this turns them into the seek artifact deterministically, so it is unit-testable without a codec.
/// </summary>
public static class OggOpusParser
{
/// <summary>
/// Walks <paramref name="oggBytes"/> and produces the setup header + seek index, or null if the
/// bytes are not a recognisable Ogg Opus stream (no setup header, no audio pages, or truncated
/// structure). A null is the caller's signal to treat the transcode as failed and leave the track
/// lossless-only (C6) — it does not throw for malformed input.
/// </summary>
public static OggOpusWalk? Walk(ReadOnlySpan<byte> oggBytes)
{
var setupHeaderEnd = -1;
var sawOpusHead = false;
var sawOpusTags = false;
ushort preSkip = 0;
var points = new List<OpusSeekPoint>();
ulong lastGranule = 0;
var nextBucketTime = 0.0;
var firstAudioPointTaken = false;
var offset = 0;
while (offset + OggOpusConstants.OggPageHeaderSize <= oggBytes.Length)
{
var page = oggBytes.Slice(offset);
if (!page[..4].SequenceEqual(OggOpusConstants.CapturePattern))
{
// Not on a page boundary — the encoder writes contiguous pages, so this means the
// stream is malformed or we mis-stepped. Either way it is unrecoverable here.
return null;
}
var segmentCount = page[OggOpusConstants.PageSegmentCountOffset];
var segmentTableEnd = OggOpusConstants.OggPageHeaderSize + segmentCount;
if (segmentTableEnd > page.Length)
return null; // truncated header
var payloadSize = 0;
for (var i = 0; i < segmentCount; i++)
payloadSize += page[OggOpusConstants.OggPageHeaderSize + i];
var pageTotalSize = segmentTableEnd + payloadSize;
if (pageTotalSize > page.Length)
return null; // truncated payload
var payload = page.Slice(segmentTableEnd, payloadSize);
var granule = BinaryPrimitives.ReadUInt64LittleEndian(
page.Slice(OggOpusConstants.GranulePositionOffset, 8));
// The setup pages carry no audio granule (OpusHead has granulepos 0; OpusTags too). They
// are the leading pages whose payload opens with the Opus magic signatures.
if (!sawOpusHead && StartsWith(payload, OggOpusConstants.OpusHeadSignature))
{
sawOpusHead = true;
setupHeaderEnd = offset + pageTotalSize;
// RFC 7845 §5.1 — OpusHead layout after the 8-byte "OpusHead" magic:
// [0] version (1 byte), [1] channel count (1 byte),
// [2-3] pre_skip (little-endian uint16) ← at packet bytes 10-11
// pre_skip is the number of decoder samples to discard before presenting audio;
// all granule→time conversions must subtract it (RFC 7845 §4.3).
if (payload.Length >= OggOpusConstants.OpusHeadMinSize)
preSkip = BinaryPrimitives.ReadUInt16LittleEndian(
payload.Slice(OggOpusConstants.OpusHeadPreSkipOffset, 2));
}
else if (sawOpusHead && !sawOpusTags && StartsWith(payload, OggOpusConstants.OpusTagsSignature))
{
sawOpusTags = true;
setupHeaderEnd = offset + pageTotalSize;
}
else if (sawOpusHead && sawOpusTags)
{
// Audio page. Record the first audio page unconditionally (the seek anchor at t=0),
// then one entry per 0.5 s bucket. A page with no end-granule (mid-packet continuation,
// granulepos == -1) is skipped for indexing — its time is unknown — but still advances
// the byte cursor.
if (granule != OggOpusConstants.NoGranulePosition)
{
// RFC 7845 §4.3: presentation time = max(0, granule preSkip) / 48000.
// Use this corrected time for bucketing so that a stream with pre-skip 3840 (~80 ms)
// does not systematically offset every indexed time by that amount.
var correctedTime = Math.Max(0.0,
(granule - (double)preSkip) / OggOpusConstants.OpusSampleRate);
if (!firstAudioPointTaken)
{
// Anchor the first seek point at corrected time = 0 by storing the granule as
// preSkip. This guarantees that a binary search for t=0 ("largest entry with
// corrected time ≤ 0") always resolves to the first audio page's byte offset —
// even when the real granule is slightly above preSkip due to encoder lead-in.
points.Add(new OpusSeekPoint(preSkip, (ulong)offset));
firstAudioPointTaken = true;
nextBucketTime = OggOpusConstants.SeekBucketSeconds;
}
else if (correctedTime >= nextBucketTime)
{
points.Add(new OpusSeekPoint(granule, (ulong)offset));
// Advance past every bucket this page crossed so a long page does not emit a
// backlog of entries; the next bucket is the first boundary strictly after it.
while (nextBucketTime <= correctedTime)
nextBucketTime += OggOpusConstants.SeekBucketSeconds;
}
lastGranule = granule;
}
}
offset += pageTotalSize;
}
if (!sawOpusHead || setupHeaderEnd < 0 || points.Count == 0)
return null;
var setupHeader = oggBytes[..setupHeaderEnd].ToArray();
// RFC 7845 §4.3: total duration is also pre-skip-corrected, matching the time a listener
// experiences (the last audio page's corrected time, clamped to ≥ 0).
var totalDuration = Math.Max(0.0,
(lastGranule - (double)preSkip) / OggOpusConstants.OpusSampleRate);
var index = new OggOpusSeekIndex(points, totalDuration, (ulong)oggBytes.Length, preSkip);
return new OggOpusWalk(setupHeader, index);
}
private static bool StartsWith(ReadOnlySpan<byte> payload, ReadOnlySpan<byte> signature) =>
payload.Length >= signature.Length && payload[..signature.Length].SequenceEqual(signature);
}
@@ -0,0 +1,124 @@
using System.Buffers.Binary;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// A single seek-index entry: an authoritative 48 kHz <see cref="GranulePosition"/> (Opus granule
/// positions are always sample counts at 48 kHz) paired with the exact byte offset of the Ogg page that
/// carries it. Every <see cref="ByteOffset"/> is a real page-start boundary, so a
/// <c>Range: bytes={ByteOffset}-</c> fetch lands the decoder Ogg-sync-aligned.
/// </summary>
/// <remarks>
/// Per RFC 7845 §4.3, the PCM presentation time is <c>(granulepos preSkip) / 48000</c>. The raw
/// <see cref="GranulePosition"/> is stored here as-is; callers should subtract the containing
/// <see cref="OggOpusSeekIndex.PreSkip"/> before converting to a presentation time. Use
/// <see cref="OggOpusSeekIndex.PresentationTimeSeconds"/> for the corrected value.
/// </remarks>
/// <param name="GranulePosition">The page's end granule position (48 kHz sample count).</param>
/// <param name="ByteOffset">The byte offset of the page start in the Opus file.</param>
public readonly record struct OpusSeekPoint(ulong GranulePosition, ulong ByteOffset)
{
/// <summary>
/// Raw granule-position-to-time conversion (granulepos / 48 kHz). Does NOT subtract pre-skip — use
/// <see cref="OggOpusSeekIndex.PresentationTimeSeconds"/> for the RFC 7845-correct presentation time.
/// </summary>
public double RawTimeSeconds => GranulePosition / OggOpusConstants.OpusSampleRate;
}
/// <summary>
/// The accurate, precomputed transfer function from seek-time to true file byte offset for one Ogg
/// Opus stream (§3.4a A). Built once at transcode time by walking the encoded stream; the client reads
/// it back and binary-searches <see cref="Points"/> instead of doing inaccurate VBR byte-rate math.
/// One entry per 0.5 s of audio (<see cref="OggOpusConstants.SeekBucketSeconds"/>), each snapped to the
/// nearest enclosing page start, plus the totals needed to clamp a seek to range.
/// </summary>
/// <param name="Points">Ordered (granulepos, byteOffset) entries, ascending. The first entry always
/// has <see cref="OpusSeekPoint.GranulePosition"/> == <paramref name="PreSkip"/> (corrected time = 0)
/// and points at the first audio page start, ensuring a seek to t=0 always resolves.</param>
/// <param name="TotalDurationSeconds">
/// Pre-skip-corrected total stream duration: <c>max(0, lastGranule preSkip) / 48000</c>.
/// </param>
/// <param name="TotalByteLength">Total Opus file byte length, for clamping a seek past the end.</param>
/// <param name="PreSkip">
/// The <c>pre_skip</c> value from the <c>OpusHead</c> identification header (RFC 7845 §5.1). Opus
/// decoders must discard this many samples from the decoded start before presenting audio. The client
/// (wave 18.4) needs this to trim the first decoded buffer; storing it here avoids a re-parse of the
/// Ogg stream at delivery time.
/// </param>
public sealed record OggOpusSeekIndex(
IReadOnlyList<OpusSeekPoint> Points,
double TotalDurationSeconds,
ulong TotalByteLength,
ushort PreSkip)
{
/// <summary>
/// Returns the RFC 7845-correct presentation time for a seek point: <c>max(0, granule preSkip) / 48000</c>.
/// Use this for all time comparisons; raw <see cref="OpusSeekPoint.RawTimeSeconds"/> omits the pre-skip.
/// </summary>
public double PresentationTimeSeconds(OpusSeekPoint point) =>
Math.Max(0.0, (point.GranulePosition - (double)PreSkip) / OggOpusConstants.OpusSampleRate);
/// <summary>
/// Serializes the index to the compact little-endian binary blob the sidecar stores. Layout:
/// <c>[uint64 totalByteLength][double totalDurationSeconds][uint32 pointCount][uint16 preSkip][uint16 reserved]</c>
/// then <c>pointCount × (uint64 granulepos, uint64 byteOffset)</c>. The four-byte preSkip+reserved
/// region pads the header to 24 bytes, keeping the point table 8-byte-aligned.
/// Fixed-width records keep the client parse to a single typed-array read.
/// </summary>
public byte[] ToBytes()
{
var size = OggOpusConstants.SeekIndexHeaderSize + Points.Count * OggOpusConstants.SeekPointSize;
var bytes = new byte[size];
var span = bytes.AsSpan();
BinaryPrimitives.WriteUInt64LittleEndian(span[..8], TotalByteLength);
BinaryPrimitives.WriteDoubleLittleEndian(span.Slice(8, 8), TotalDurationSeconds);
BinaryPrimitives.WriteUInt32LittleEndian(span.Slice(16, 4), (uint)Points.Count);
BinaryPrimitives.WriteUInt16LittleEndian(span.Slice(20, 2), PreSkip);
// bytes 22-23: reserved (zero-initialized by array allocation)
var cursor = OggOpusConstants.SeekIndexHeaderSize;
foreach (var point in Points)
{
BinaryPrimitives.WriteUInt64LittleEndian(span.Slice(cursor, 8), point.GranulePosition);
BinaryPrimitives.WriteUInt64LittleEndian(span.Slice(cursor + 8, 8), point.ByteOffset);
cursor += OggOpusConstants.SeekPointSize;
}
return bytes;
}
/// <summary>
/// Parses a blob produced by <see cref="ToBytes"/>. Returns null if the blob is too short or its
/// declared point count does not fit — the storage contract is exact, so a malformed blob is a
/// corruption signal, not a recoverable shape. (Provided so tests and any future server-side reader
/// share one codec with the writer.)
/// </summary>
public static OggOpusSeekIndex? FromBytes(ReadOnlySpan<byte> bytes)
{
if (bytes.Length < OggOpusConstants.SeekIndexHeaderSize)
return null;
var totalByteLength = BinaryPrimitives.ReadUInt64LittleEndian(bytes[..8]);
var totalDuration = BinaryPrimitives.ReadDoubleLittleEndian(bytes.Slice(8, 8));
var count = BinaryPrimitives.ReadUInt32LittleEndian(bytes.Slice(16, 4));
var preSkip = BinaryPrimitives.ReadUInt16LittleEndian(bytes.Slice(20, 2));
// bytes 22-23: reserved — ignored on read for forward-compatibility
var expected = OggOpusConstants.SeekIndexHeaderSize + (long)count * OggOpusConstants.SeekPointSize;
if (bytes.Length < expected)
return null;
var points = new OpusSeekPoint[count];
var cursor = OggOpusConstants.SeekIndexHeaderSize;
for (var i = 0; i < count; i++)
{
var granule = BinaryPrimitives.ReadUInt64LittleEndian(bytes.Slice(cursor, 8));
var offset = BinaryPrimitives.ReadUInt64LittleEndian(bytes.Slice(cursor + 8, 8));
points[i] = new OpusSeekPoint(granule, offset);
cursor += OggOpusConstants.SeekPointSize;
}
return new OggOpusSeekIndex(points, totalDuration, totalByteLength, preSkip);
}
}
@@ -0,0 +1,57 @@
using System.Buffers.Binary;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// The single derived sidecar artifact per track (§3.4a B, recommended design): the Opus setup header
/// (<c>OpusHead</c> + <c>OpusTags</c>) followed by the granule→byte seek index. The client fetches this
/// once on track load and parses it into its <c>OpusSeekData</c>, so it always has both the setup bytes
/// (to prepend to any mid-stream slice) and the accurate seek transfer function before it ever issues a
/// Range fetch — including a window that opens away from byte 0 (UC9).
/// </summary>
/// <param name="SetupHeaderBytes">The verbatim OpusHead + OpusTags pages.</param>
/// <param name="SeekIndex">The bucketed granule→byte seek index.</param>
public sealed record OpusSidecar(byte[] SetupHeaderBytes, OggOpusSeekIndex SeekIndex)
{
/// <summary>
/// Serializes to <c>[uint32 setupHeaderLength][setup-header bytes][seek-index blob]</c>. The
/// length prefix lets the client split the two regions with one read; the seek-index blob carries
/// its own self-describing header (<see cref="OggOpusSeekIndex.ToBytes"/>), so it needs no trailing
/// length.
/// </summary>
public byte[] ToBytes()
{
var indexBytes = SeekIndex.ToBytes();
var bytes = new byte[4 + SetupHeaderBytes.Length + indexBytes.Length];
var span = bytes.AsSpan();
BinaryPrimitives.WriteUInt32LittleEndian(span[..4], (uint)SetupHeaderBytes.Length);
SetupHeaderBytes.CopyTo(span.Slice(4));
indexBytes.CopyTo(span.Slice(4 + SetupHeaderBytes.Length));
return bytes;
}
/// <summary>
/// Parses a blob produced by <see cref="ToBytes"/>. Returns null on any structural inconsistency
/// (short blob, length prefix that overruns, or an unparseable index) — the format is exact, so a
/// malformed blob is corruption.
/// </summary>
public static OpusSidecar? FromBytes(ReadOnlySpan<byte> bytes)
{
if (bytes.Length < 4)
return null;
var setupLength = BinaryPrimitives.ReadUInt32LittleEndian(bytes[..4]);
var indexStart = 4 + (long)setupLength;
if (bytes.Length < indexStart)
return null;
var setupHeader = bytes.Slice(4, (int)setupLength).ToArray();
var index = OggOpusSeekIndex.FromBytes(bytes.Slice((int)indexStart));
if (index is null)
return null;
return new OpusSidecar(setupHeader, index);
}
}
@@ -0,0 +1,33 @@
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// Host-supplied configuration for the Opus transcode. The only operationally significant knob is
/// <see cref="FfmpegPath"/> — the transcode shells out to FFmpeg (libopus), which must be present on the
/// DeepDrftAPI host (see the wave handoff notes). Defaults target Ogg Opus fullband (48 kHz) at 320 kbps,
/// the artifact the spec fixes (§1).
/// </summary>
public sealed class OpusTranscodeOptions
{
/// <summary>
/// Path to the ffmpeg executable. Empty/null resolves to <c>"ffmpeg"</c> (found on PATH). Override
/// with an absolute path when the binary is not on the host PATH.
/// </summary>
public string FfmpegPath { get; set; } = "ffmpeg";
/// <summary>Target Opus bitrate in kbps. 320 kbps fullband is the fixed artifact quality (§1).</summary>
public int BitrateKbps { get; set; } = 320;
/// <summary>
/// Directory for the transient source/output files the transcode stages. Defaults to the system
/// temp path; the host overrides it to the data-disk upload-staging directory so large files never
/// land on the small RAM-backed <c>/tmp</c> tmpfs (same constraint the upload path already honours).
/// </summary>
public string StagingPath { get; set; } = Path.GetTempPath();
/// <summary>
/// Hard ceiling on a single transcode, in seconds. A run that exceeds it is killed and the track
/// stays lossless-only (C6). Generous by default — a 1 GB mix is CPU-expensive (§3.1) — but bounded
/// so a hung ffmpeg never wedges the background worker.
/// </summary>
public int TimeoutSeconds { get; set; } = 3600;
}
@@ -0,0 +1,154 @@
using DeepDrftContent.Constants;
using DeepDrftContent.FileDatabase.Models;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// Derives and persists a track's low-data Ogg Opus artifacts (Phase 18.1). Mirrors
/// <see cref="WaveformProfileService"/>'s derived-artifact lifecycle: compute from the stored source,
/// store in a dedicated vault keyed by <c>EntryKey</c>, regenerable, failure-tolerant. For one track it
/// produces two entries in the <see cref="VaultConstants.TrackOpus"/> vault — the Opus audio bytes and a
/// combined setup-header + seek-index sidecar (§3.4a). Strictly additive: the source <c>tracks</c> vault
/// is never touched, and a failure here leaves the track lossless-only and eligible for backfill (C2/C6).
/// </summary>
public sealed class OpusTranscodeService
{
private readonly FileDb _fileDatabase;
private readonly FfmpegOpusEncoder _encoder;
private readonly OpusTranscodeOptions _options;
private readonly ILogger<OpusTranscodeService> _logger;
public OpusTranscodeService(
FileDb fileDatabase,
FfmpegOpusEncoder encoder,
IOptions<OpusTranscodeOptions> options,
ILogger<OpusTranscodeService> logger)
{
_fileDatabase = fileDatabase;
_encoder = encoder;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Reads the source audio for <paramref name="entryKey"/> from the <c>tracks</c> vault, transcodes it
/// to Ogg Opus 320, walks the encoded stream to build the seek index + capture the setup header, and
/// stores the Opus bytes and the sidecar in the <see cref="VaultConstants.TrackOpus"/> vault under the
/// same key. Re-runnable — a second call overwrites the prior artifacts (backfill / replace-audio).
/// Returns false (logged) on any failure; never throws for expected failure modes (C6). The only
/// propagated exception is <see cref="OperationCanceledException"/> on genuine shutdown.
/// </summary>
public async Task<bool> TranscodeAndStoreAsync(string entryKey, CancellationToken ct)
{
var source = await _fileDatabase.LoadResourceAsync<AudioBinary>(VaultConstants.Tracks, entryKey);
if (source is null)
{
_logger.LogWarning("Opus transcode: no source audio in vault for {EntryKey}; skipping.", entryKey);
return false;
}
Directory.CreateDirectory(_options.StagingPath);
var sourcePath = Path.Combine(_options.StagingPath, $"opus-src-{Guid.NewGuid():N}{source.Extension}");
var opusPath = Path.Combine(_options.StagingPath, $"opus-out-{Guid.NewGuid():N}{OggOpusConstants.OpusExtension}");
try
{
await File.WriteAllBytesAsync(sourcePath, source.Buffer, ct);
if (!await _encoder.EncodeAsync(sourcePath, opusPath, ct))
return false; // encoder already logged the cause
var opusBytes = await File.ReadAllBytesAsync(opusPath, ct);
var walk = OggOpusParser.Walk(opusBytes);
if (walk is null)
{
_logger.LogError(
"Opus transcode: ffmpeg produced output but the Ogg stream could not be walked for {EntryKey}; " +
"no artifacts stored.", entryKey);
return false;
}
await EnsureVaultAsync();
var opusBitrate = source.Duration > 0
? (int)(opusBytes.Length * 8 / source.Duration / 1000)
: _options.BitrateKbps;
var audioBinary = new AudioBinary(new AudioBinaryParams(
opusBytes, opusBytes.Length, OggOpusConstants.OpusExtension, source.Duration, opusBitrate));
var sidecar = new OpusSidecar(walk.SetupHeaderBytes, walk.SeekIndex).ToBytes();
var sidecarBinary = new MediaBinary(new MediaBinaryParams(
sidecar, sidecar.Length, OggOpusConstants.SidecarExtension));
// Store the audio first, then the sidecar. If the sidecar write fails the Opus bytes are
// present but unseekable — treat that as a failed derive (return false) so a backfill re-runs
// it; do not leave a half-derived track that the delivery layer would treat as complete.
var audioStored = await _fileDatabase.RegisterResourceAsync(
VaultConstants.TrackOpus, OpusAudioKey(entryKey), audioBinary);
if (!audioStored)
{
_logger.LogError("Opus transcode: vault write of Opus audio failed for {EntryKey}.", entryKey);
return false;
}
var sidecarStored = await _fileDatabase.RegisterResourceAsync(
VaultConstants.TrackOpus, OpusSidecarKey(entryKey), sidecarBinary);
if (!sidecarStored)
{
_logger.LogError("Opus transcode: vault write of sidecar failed for {EntryKey}.", entryKey);
return false;
}
_logger.LogInformation(
"Opus transcode complete for {EntryKey}: {OpusBytes} bytes, {Points} seek points, {Duration:F1}s.",
entryKey, opusBytes.Length, walk.SeekIndex.Points.Count, walk.SeekIndex.TotalDurationSeconds);
return true;
}
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Opus transcode failed for {EntryKey}; track stays lossless-only.", entryKey);
return false;
}
finally
{
TryDelete(sourcePath);
TryDelete(opusPath);
}
}
/// <summary>The vault entry key under which a track's Opus audio bytes are stored.</summary>
public static string OpusAudioKey(string entryKey) => entryKey;
/// <summary>The vault entry key under which a track's setup-header + seek-index sidecar is stored.</summary>
public static string OpusSidecarKey(string entryKey) => $"{entryKey}-sidecar";
private async Task EnsureVaultAsync()
{
// The TrackOpus vault is created at host startup (Startup.cs), so this guard is normally a
// no-op for the upload path. It is retained for the backfill path, which may run via a
// standalone CLI or a host that skips vault pre-creation, where the vault might not exist.
if (!_fileDatabase.HasVault(VaultConstants.TrackOpus))
await _fileDatabase.CreateVaultAsync(VaultConstants.TrackOpus, MediaVaultType.Audio);
}
private void TryDelete(string path)
{
try
{
if (File.Exists(path))
File.Delete(path);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Opus transcode: failed to delete staging file {Path}.", path);
}
}
}
@@ -0,0 +1,23 @@
using DeepDrftContent.FileDatabase.Models;
using DeepDrftModels.Enums;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// The outcome of resolving a track + requested <see cref="AudioFormat"/> to a concrete artifact
/// (Phase 18.2). Carries the bytes, the content-type that matches <em>what was actually returned</em>,
/// and the format actually served — which may differ from the requested one when the C2 fallback fires
/// (Opus requested, no Opus artifact → the lossless artifact + its content-type). The delivery layer
/// (18.3) sets the response <c>Content-Type</c> from <see cref="ContentType"/> so the eventual decoder
/// picks the right decoder for the bytes it receives, not the bytes the listener asked for.
/// </summary>
/// <param name="Audio">The resolved audio artifact (never null when a resolution succeeds).</param>
/// <param name="ContentType">The MIME type of <paramref name="Audio"/> (e.g. <c>audio/ogg</c> for Opus,
/// or the source's real MIME for lossless).</param>
/// <param name="ResolvedFormat">The format actually returned. Equal to the requested format on a direct
/// hit; <see cref="AudioFormat.Lossless"/> when an Opus request fell back.</param>
public sealed record ResolvedAudio(AudioBinary Audio, string ContentType, AudioFormat ResolvedFormat)
{
/// <summary>True when an Opus request was served the lossless artifact because no Opus existed (C2).</summary>
public bool DidFallBack(AudioFormat requested) => requested != ResolvedFormat;
}
@@ -0,0 +1,110 @@
using DeepDrftContent.Constants;
using DeepDrftContent.FileDatabase.Models;
using DeepDrftModels.Enums;
using Microsoft.Extensions.Logging;
using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// The server-side format resolution + sidecar lookup seam (Phase 18.2). Given a track's
/// <c>EntryKey</c> and a requested <see cref="AudioFormat"/>, returns the correct audio artifact and the
/// content-type that matches it; given an <c>EntryKey</c>, returns the Opus seek/setup sidecar bytes.
/// Downstream waves call this — 18.3 wires it behind the <c>?format=</c> stream param and serves the
/// sidecar over HTTP; this wave delivers only the seam, not the HTTP surface.
/// <para>
/// Additive and non-breaking (C2): the lossless branch reads the source exactly as the existing stream
/// path does (via <see cref="TrackContentService.GetAudioBinaryAsync"/>), and an Opus request for a track
/// with no Opus artifact falls back to lossless rather than failing. Mirrors the
/// <see cref="WaveformProfileService"/> derived-artifact lookup precedent: read from the dedicated vault,
/// swallow misses to null (FileDatabase convention), let the caller decide.
/// </para>
/// </summary>
public sealed class TrackFormatResolver
{
private readonly FileDb _fileDatabase;
private readonly TrackContentService _trackContentService;
private readonly ILogger<TrackFormatResolver> _logger;
public TrackFormatResolver(
FileDb fileDatabase,
TrackContentService trackContentService,
ILogger<TrackFormatResolver> logger)
{
_fileDatabase = fileDatabase;
_trackContentService = trackContentService;
_logger = logger;
}
/// <summary>
/// Resolves <paramref name="entryKey"/> + <paramref name="requestedFormat"/> to the audio artifact to
/// serve plus its content-type. <see cref="AudioFormat.Lossless"/> resolves the source artifact in the
/// <c>tracks</c> vault with its real MIME (WAV/MP3/FLAC). <see cref="AudioFormat.Opus"/> resolves the
/// derived Opus artifact (<c>audio/ogg</c>) when present, and <strong>falls back to lossless</strong>
/// when it is not (C2). Returns null only when even the lossless source is missing — i.e. the track has
/// no audio at all (an unknown key or a genuinely empty vault), the one case the caller treats as 404.
/// </summary>
public async Task<ResolvedAudio?> ResolveAsync(string entryKey, AudioFormat requestedFormat)
{
if (requestedFormat == AudioFormat.Opus)
{
var opus = await _fileDatabase.LoadResourceAsync<AudioBinary>(
VaultConstants.TrackOpus, OpusTranscodeService.OpusAudioKey(entryKey));
if (opus is not null)
return new ResolvedAudio(opus, MimeTypeExtensions.GetMimeType(opus.Extension), AudioFormat.Opus);
// C2 fallback: no Opus artifact yet (legacy row, not backfilled, or transcode failed). Degrade
// to lossless rather than 404 — Opus is strictly additive; its absence never means "no audio".
_logger.LogInformation(
"Opus requested for {EntryKey} but no Opus artifact exists; falling back to lossless.", entryKey);
}
return await ResolveLosslessAsync(entryKey);
}
/// <summary>
/// Resolves the lossless source artifact and its real MIME — the existing read path, unchanged. Shared
/// by the explicit-lossless branch and the Opus fallback so both produce identical bytes + content-type.
/// </summary>
private async Task<ResolvedAudio?> ResolveLosslessAsync(string entryKey)
{
var source = await _trackContentService.GetAudioBinaryAsync(entryKey);
if (source is null)
return null;
return new ResolvedAudio(source, MimeTypeExtensions.GetMimeType(source.Extension), AudioFormat.Lossless);
}
/// <summary>
/// Returns the Opus setup-header + seek-index sidecar bytes for <paramref name="entryKey"/>, or null
/// when no sidecar is stored (no Opus artifact yet, or an older derive predating the sidecar). 18.3
/// serves these on their own path; 18.4 fetches them once on track load and parses them into the
/// client's <c>OpusSeekData</c>. The bytes are the raw <see cref="OpusSidecar"/> blob
/// (<c>[uint32 setupHeaderLength][setup-header][seek-index]</c>) exactly as 18.1 stored them.
/// </summary>
public async Task<byte[]?> GetOpusSidecarAsync(string entryKey)
{
var sidecar = await _fileDatabase.LoadResourceAsync<MediaBinary>(
VaultConstants.TrackOpus, OpusTranscodeService.OpusSidecarKey(entryKey));
return sidecar?.Buffer;
}
/// <summary>
/// Reports whether <paramref name="entryKey"/> already has a complete Opus derive — both the audio bytes
/// AND the seek/setup sidecar present in the <c>track-opus</c> vault. The Backfill-Opus pass (18.5) uses
/// this to enqueue only tracks that are missing or half-derived (audio without sidecar = unseekable, so
/// treated as incomplete and re-derived). Both halves are required because the transcode stores them in
/// sequence and a sidecar-write failure leaves a track the delivery layer must not treat as Opus-ready.
/// </summary>
public async Task<bool> HasOpusAsync(string entryKey)
{
var audio = await _fileDatabase.LoadResourceAsync<AudioBinary>(
VaultConstants.TrackOpus, OpusTranscodeService.OpusAudioKey(entryKey));
if (audio is null)
return false;
var sidecar = await _fileDatabase.LoadResourceAsync<MediaBinary>(
VaultConstants.TrackOpus, OpusTranscodeService.OpusSidecarKey(entryKey));
return sidecar is not null;
}
}