Files
deepdrft/DeepDrftContent/Processors/Opus/OpusTranscodeService.cs
T
daniel-c-harvey 4351ae04be Stream Opus transcode source and encoded output; removes last store-path OOM
Source read via streamed vault open + bounded staging copy (index-only duration/extension); encoded output walked from a bounded stream (new OggOpusParser.WalkAsync, byte-identical to the buffer oracle) and stored via streaming vault write. Adds parity tests.
2026-06-26 14:06:33 -04:00

190 lines
9.1 KiB
C#

using DeepDrftContent.Constants;
using DeepDrftContent.FileDatabase.Models;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase;
namespace DeepDrftContent.Processors.Opus;
/// <summary>
/// Derives and persists a track's low-data Ogg Opus artifacts (Phase 18.1). Mirrors
/// <see cref="WaveformProfileService"/>'s derived-artifact lifecycle: compute from the stored source,
/// store in a dedicated vault keyed by <c>EntryKey</c>, regenerable, failure-tolerant. For one track it
/// produces two entries in the <see cref="VaultConstants.TrackOpus"/> vault — the Opus audio bytes and a
/// combined setup-header + seek-index sidecar (§3.4a). Strictly additive: the source <c>tracks</c> vault
/// is never touched, and a failure here leaves the track lossless-only and eligible for backfill (C2/C6).
/// </summary>
public sealed class OpusTranscodeService
{
private readonly FileDb _fileDatabase;
private readonly TrackContentService _trackContent;
private readonly FfmpegOpusEncoder _encoder;
private readonly OpusTranscodeOptions _options;
private readonly ILogger<OpusTranscodeService> _logger;
public OpusTranscodeService(
FileDb fileDatabase,
TrackContentService trackContent,
FfmpegOpusEncoder encoder,
IOptions<OpusTranscodeOptions> options,
ILogger<OpusTranscodeService> logger)
{
_fileDatabase = fileDatabase;
_trackContent = trackContent;
_encoder = encoder;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Reads the source audio for <paramref name="entryKey"/> from the <c>tracks</c> vault, transcodes it
/// to Ogg Opus 320, walks the encoded stream to build the seek index + capture the setup header, and
/// stores the Opus bytes and the sidecar in the <see cref="VaultConstants.TrackOpus"/> vault under the
/// same key. Re-runnable — a second call overwrites the prior artifacts (backfill / replace-audio).
/// Returns false (logged) on any failure; never throws for expected failure modes (C6). The only
/// propagated exception is <see cref="OperationCanceledException"/> on genuine shutdown.
/// </summary>
public async Task<bool> TranscodeAndStoreAsync(string entryKey, CancellationToken ct)
{
// Read the source extension + duration from the vault index (no body load) and open a streamed
// read over the source bytes — never the whole-buffer AudioBinary. A 92-min mix source is ~970 MB;
// buffering it (and the encoded output below) was the last unconverted store-path OOM violation.
var trackDuration = await _trackContent.GetAudioDurationAsync(entryKey) ?? 0.0;
var sourceMedia = await _trackContent.OpenAudioMediaStreamAsync(entryKey);
if (sourceMedia is null)
{
_logger.LogWarning("Opus transcode: no source audio in vault for {EntryKey}; skipping.", entryKey);
return false;
}
string? sourcePath = null;
string? opusPath = null;
try
{
// Stage the source to disk in bounded chunks so ffmpeg can read it by file path/extension.
// The inner finally disposes the source stream as soon as the copy is done — the read handle
// is not held across the (long) encode — and guarantees disposal even if staging setup throws.
try
{
Directory.CreateDirectory(_options.StagingPath);
sourcePath = Path.Combine(_options.StagingPath, $"opus-src-{Guid.NewGuid():N}{sourceMedia.Extension}");
opusPath = Path.Combine(_options.StagingPath, $"opus-out-{Guid.NewGuid():N}{OggOpusConstants.OpusExtension}");
await using var staging = new FileStream(
sourcePath, FileMode.Create, FileAccess.Write, FileShare.None,
bufferSize: 81920, useAsync: true);
await sourceMedia.Stream.CopyToAsync(staging, bufferSize: 81920, ct);
}
finally
{
await sourceMedia.DisposeAsync();
}
if (!await _encoder.EncodeAsync(sourcePath, opusPath, ct))
return false; // encoder already logged the cause
// Walk the encoded output from a streamed read in a bounded buffer (no whole-file load). The
// seek index and setup header are byte-identical to the buffer walk (parity-tested).
OggOpusWalk? walk;
await using (var opusIn = new FileStream(
opusPath, FileMode.Open, FileAccess.Read, FileShare.Read,
bufferSize: 81920, useAsync: true))
{
walk = await OggOpusParser.WalkAsync(opusIn, ct);
}
if (walk is null)
{
_logger.LogError(
"Opus transcode: ffmpeg produced output but the Ogg stream could not be walked for {EntryKey}; " +
"no artifacts stored.", entryKey);
return false;
}
await EnsureVaultAsync();
// Bitrate from the output file length + duration — both available without buffering the bytes.
var opusLength = new FileInfo(opusPath).Length;
var opusBitrate = trackDuration > 0
? (int)(opusLength * 8 / trackDuration / 1000)
: _options.BitrateKbps;
// Store the audio first, then the sidecar. If the sidecar write fails the Opus bytes are
// present but unseekable — treat that as a failed derive (return false) so a backfill re-runs
// it; do not leave a half-derived track that the delivery layer would treat as complete.
var audioMeta = MetaDataFactory.CreateAudioMetaData(
OpusAudioKey(entryKey), OggOpusConstants.OpusExtension, trackDuration, opusBitrate);
var stagedOpusPath = opusPath;
var audioStored = await _fileDatabase.RegisterResourceStreamingAsync(
VaultConstants.TrackOpus, OpusAudioKey(entryKey), audioMeta,
(destination, token) => AudioStoreStream.CopyFileAsync(stagedOpusPath, destination, token), ct);
if (!audioStored)
{
_logger.LogError("Opus transcode: vault write of Opus audio failed for {EntryKey}.", entryKey);
return false;
}
// The sidecar is the setup header (a few KB) plus the seek index (~16 bytes per 0.5 s bucket);
// it is inherently bounded and already in managed memory, so the whole-buffer write is correct.
var sidecar = new OpusSidecar(walk.SetupHeaderBytes, walk.SeekIndex).ToBytes();
var sidecarBinary = new MediaBinary(new MediaBinaryParams(
sidecar, sidecar.Length, OggOpusConstants.SidecarExtension));
var sidecarStored = await _fileDatabase.RegisterResourceAsync(
VaultConstants.TrackOpus, OpusSidecarKey(entryKey), sidecarBinary);
if (!sidecarStored)
{
_logger.LogError("Opus transcode: vault write of sidecar failed for {EntryKey}.", entryKey);
return false;
}
_logger.LogInformation(
"Opus transcode complete for {EntryKey}: {OpusBytes} bytes, {Points} seek points, {Duration:F1}s.",
entryKey, opusLength, walk.SeekIndex.Points.Count, walk.SeekIndex.TotalDurationSeconds);
return true;
}
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Opus transcode failed for {EntryKey}; track stays lossless-only.", entryKey);
return false;
}
finally
{
if (sourcePath is not null)
TryDelete(sourcePath);
if (opusPath is not null)
TryDelete(opusPath);
}
}
/// <summary>The vault entry key under which a track's Opus audio bytes are stored.</summary>
public static string OpusAudioKey(string entryKey) => entryKey;
/// <summary>The vault entry key under which a track's setup-header + seek-index sidecar is stored.</summary>
public static string OpusSidecarKey(string entryKey) => $"{entryKey}-sidecar";
private async Task EnsureVaultAsync()
{
// The TrackOpus vault is created at host startup (Startup.cs), so this guard is normally a
// no-op for the upload path. It is retained for the backfill path, which may run via a
// standalone CLI or a host that skips vault pre-creation, where the vault might not exist.
if (!_fileDatabase.HasVault(VaultConstants.TrackOpus))
await _fileDatabase.CreateVaultAsync(VaultConstants.TrackOpus, MediaVaultType.Audio);
}
private void TryDelete(string path)
{
try
{
if (File.Exists(path))
File.Delete(path);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Opus transcode: failed to delete staging file {Path}.", path);
}
}
}