using DeepDrftContent.Constants; using DeepDrftContent.FileDatabase.Models; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase; namespace DeepDrftContent.Processors; /// /// Computes a track's waveform loudness profile from its WAV bytes and persists it as a sidecar /// in the vault, keyed by the track's EntryKey. /// The profile is the upload-time, off-the-playback-path representation the frontend fetches to /// render the WaveformSeeker. The loudness measure is injected () /// so it can be swapped without changing storage or the wire format. /// public class WaveformProfileService { private const string ProfileExtension = ".wfp"; /// Bounded read-buffer size for the streaming PCM pass — the only filesize-independent /// allocation on the streaming path (matches the store path's 80 KB copy buffer). private const int StreamReadBufferSize = 81920; private readonly FileDb _fileDatabase; private readonly AudioProcessor _audioProcessor; private readonly ILoudnessAlgorithm _loudnessAlgorithm; private readonly WaveformProfileOptions _options; private readonly ILogger _logger; public WaveformProfileService( FileDb fileDatabase, AudioProcessor audioProcessor, ILoudnessAlgorithm loudnessAlgorithm, IOptions options, ILogger logger) { _fileDatabase = fileDatabase; _audioProcessor = audioProcessor; _loudnessAlgorithm = loudnessAlgorithm; _options = options.Value; _logger = logger; } /// /// Computes the loudness profile from and stores it under /// in (defaults to /// when null). Bucket resolution defaults to /// (512) when is null; /// callers pass an explicit count for higher-resolution data — e.g. the per-track high-res datum /// derives its count from the audio duration (≈333 samples/sec, see WaveformResolution) so long /// tracks are not under-sampled. This service is content-agnostic: it captures however many buckets it is told to and /// does not itself decide the count. Returns false (and logs) on any /// failure — a missing profile is handled gracefully downstream, so callers on the upload path /// log-and-continue rather than failing the upload. Does not throw for expected failure modes. /// public async Task ComputeAndStoreAsync( ReadOnlyMemory wavBytes, string entryKey, int? bucketCount = null, string? vaultName = null) { var effectiveBucketCount = bucketCount ?? _options.BucketCount; var effectiveVaultName = vaultName ?? VaultConstants.WaveformProfiles; try { var pcm = _audioProcessor.TryExtractPcm(wavBytes.Span); if (pcm is null) { _logger.LogWarning( "Waveform profile not computed for {EntryKey}: WAV PCM could not be extracted.", entryKey); return false; } var value = pcm.Value; var profile = _loudnessAlgorithm.Compute( value.Pcm.Span, value.Channels, value.SampleRate, value.BitsPerSample, effectiveBucketCount); var quantized = Quantize(profile); await EnsureVaultAsync(effectiveVaultName); var binary = new MediaBinary(new MediaBinaryParams(quantized, quantized.Length, ProfileExtension)); var stored = await _fileDatabase.RegisterResourceAsync(effectiveVaultName, entryKey, binary); if (!stored) { _logger.LogWarning("Waveform profile vault write failed for {EntryKey}.", entryKey); return false; } return true; } catch (Exception ex) when (ex is not OperationCanceledException) { _logger.LogError(ex, "Waveform profile computation failed for {EntryKey}.", entryKey); return false; } } /// /// Computes a track's high-resolution loudness datum and stores it in the /// vault keyed by . The bucket /// count is duration-derived (≈333 samples/sec, clamped — see ) so the /// datum captures at a constant time resolution regardless of track length. This is the single home /// for "the high-res per-track datum" — the upload path, the CMS generate action, and the Mix trigger /// all funnel through it, so every track (Mix, Session, Cut) gets an identical datum keyed the same way. /// Returns false (logged) on any failure, per the content-agnostic contract above. /// public Task ComputeAndStoreHighResAsync( ReadOnlyMemory wavBytes, string entryKey, double durationSeconds) { var bucketCount = WaveformResolution.BucketCountForDuration(durationSeconds); return ComputeAndStoreAsync(wavBytes, entryKey, bucketCount, VaultConstants.TrackWaveforms); } /// /// Streaming counterpart of : computes and stores the fixed /// 512-bucket player-bar profile by reading the WAV from in bounded /// chunks, never materializing the whole file in a managed byte[]. Tri-state result matches /// the RemoveResourceAsync idiom so callers can map outcomes precisely: null = no audio /// stream available (the entry has no backing audio); false = audio present but no profile /// computable (non-WAV / float / padded) or the vault write failed; true = stored. Output is /// byte-identical to the whole-buffer path for the same WAV. /// public Task ComputeAndStoreProfileStreamingAsync( Func> openWavStream, string entryKey, CancellationToken ct = default) => RunStreamingAsync( openWavStream, entryKey, [(_options.BucketCount, VaultConstants.WaveformProfiles)], ct); /// /// Streaming counterpart of : computes and stores the /// duration-derived high-res datum () by streaming the WAV /// from . drives the bucket count /// exactly as the whole-buffer path's audio.Duration did — pass the same vault-metadata /// duration to keep the stored bytes identical. Tri-state result as in /// . /// public Task ComputeAndStoreHighResStreamingAsync( Func> openWavStream, string entryKey, double durationSeconds, CancellationToken ct = default) => RunStreamingAsync( openWavStream, entryKey, [(WaveformResolution.BucketCountForDuration(durationSeconds), VaultConstants.TrackWaveforms)], ct); /// /// Computes and stores BOTH datums a track carries — the 512-bucket profile and the duration-derived /// high-res datum — from a SINGLE streaming pass over the WAV. One sequential read of the (possibly /// ~GB) audio feeds two independent accumulators, so memory stays O(bucket arrays + read buffer) and /// disk I/O is halved versus two separate passes. This is the upload / replace-audio hot path. Each /// datum's stored bytes are byte-identical to its whole-buffer counterpart. Tri-state: null = /// no audio stream; false = not WAV-decodable or a vault write failed; true = both /// datums stored. Best-effort callers ignore the result. /// public Task ComputeAndStoreAllStreamingAsync( Func> openWavStream, string entryKey, double durationSeconds, CancellationToken ct = default) => RunStreamingAsync( openWavStream, entryKey, [ (_options.BucketCount, VaultConstants.WaveformProfiles), (WaveformResolution.BucketCountForDuration(durationSeconds), VaultConstants.TrackWaveforms), ], ct); /// /// Core streaming reduction: opens the WAV once, parses its header (bounded), then streams the PCM /// data region through one loudness accumulator per requested target, storing each datum. All /// targets are computed in the single pass. See the tri-state contract on the public wrappers. /// private async Task RunStreamingAsync( Func> openWavStream, string entryKey, IReadOnlyList<(int BucketCount, string VaultName)> targets, CancellationToken ct) { try { await using var stream = await openWavStream(ct); if (stream is null) { // No backing audio for this entry — distinct from "present but undecodable". return null; } var info = await _audioProcessor.TryReadPcmStreamInfoAsync(stream, stream.Length, ct); if (info is null) { _logger.LogWarning( "Waveform profile not computed for {EntryKey}: WAV PCM could not be extracted (streaming).", entryKey); return false; } var v = info.Value; var accumulators = new ILoudnessAccumulator[targets.Count]; for (var i = 0; i < targets.Count; i++) { accumulators[i] = _loudnessAlgorithm.CreateAccumulator( v.DataLength, v.Channels, v.SampleRate, v.BitsPerSample, targets[i].BucketCount); } await StreamPcmThroughAsync(stream, v.DataStart, v.DataLength, accumulators, ct); _logger.LogInformation( "Streaming waveform compute for {EntryKey}: {DataLength} PCM bytes, {TargetCount} datum(s), " + "{BufferSize}B read buffer — no whole-file load.", entryKey, v.DataLength, targets.Count, StreamReadBufferSize); var allStored = true; for (var i = 0; i < targets.Count; i++) { var profile = accumulators[i].Finish(); var quantized = Quantize(profile); await EnsureVaultAsync(targets[i].VaultName); var binary = new MediaBinary(new MediaBinaryParams(quantized, quantized.Length, ProfileExtension)); var stored = await _fileDatabase.RegisterResourceAsync(targets[i].VaultName, entryKey, binary); if (!stored) { _logger.LogWarning( "Waveform vault write failed for {EntryKey} in {VaultName}.", entryKey, targets[i].VaultName); allStored = false; } } return allStored; } catch (Exception ex) when (ex is not OperationCanceledException) { _logger.LogError(ex, "Streaming waveform computation failed for {EntryKey}.", entryKey); return false; } } /// /// Seeks to the PCM data region and streams exactly bytes through each /// accumulator in bounded reads. The accumulators carry partial frames internally, so the read /// boundaries need not align to frames. Peak memory is one read buffer — independent of file size. /// private static async Task StreamPcmThroughAsync( Stream stream, long dataStart, long dataLength, ILoudnessAccumulator[] accumulators, CancellationToken ct) { stream.Seek(dataStart, SeekOrigin.Begin); var buffer = new byte[StreamReadBufferSize]; var remaining = dataLength; while (remaining > 0) { var want = (int)Math.Min(buffer.Length, remaining); var read = await stream.ReadAsync(buffer.AsMemory(0, want), ct); if (read == 0) break; var span = buffer.AsSpan(0, read); foreach (var accumulator in accumulators) { accumulator.Add(span); } remaining -= read; } } /// /// Returns the stored quantized profile bytes for a track from /// (defaults to when null), or null if no profile /// is stored (existing tracks predate profiling, and computation may have failed). Each byte is /// a peak-normalized loudness value in [0, 255]. /// public async Task GetProfileAsync(string entryKey, string? vaultName = null) { var binary = await _fileDatabase.LoadResourceAsync( vaultName ?? VaultConstants.WaveformProfiles, entryKey); return binary?.Buffer; } /// /// Maps each [0, 1] bucket to a [0, 255] byte. 1.0 maps to 255; the multiply-by-255 with a /// truncating cast keeps every in-range value within a byte without a clamp branch. /// private static byte[] Quantize(double[] profile) { var bytes = new byte[profile.Length]; for (var i = 0; i < profile.Length; i++) { bytes[i] = (byte)(profile[i] * 255); } return bytes; } private async Task EnsureVaultAsync(string vaultName) { if (!_fileDatabase.HasVault(vaultName)) { await _fileDatabase.CreateVaultAsync(vaultName, MediaVaultType.Media); } } }