314 lines
14 KiB
C#
314 lines
14 KiB
C#
using DeepDrftContent.Constants;
|
|
using DeepDrftContent.FileDatabase.Models;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase;
|
|
|
|
namespace DeepDrftContent.Processors;
|
|
|
|
/// <summary>
|
|
/// Computes a track's waveform loudness profile from its WAV bytes and persists it as a sidecar
|
|
/// in the <see cref="VaultConstants.WaveformProfiles"/> vault, keyed by the track's EntryKey.
|
|
/// The profile is the upload-time, off-the-playback-path representation the frontend fetches to
|
|
/// render the WaveformSeeker. The loudness measure is injected (<see cref="ILoudnessAlgorithm"/>)
|
|
/// so it can be swapped without changing storage or the wire format.
|
|
/// </summary>
|
|
public class WaveformProfileService
|
|
{
|
|
private const string ProfileExtension = ".wfp";
|
|
|
|
/// <summary>Bounded read-buffer size for the streaming PCM pass — the only filesize-independent
|
|
/// allocation on the streaming path (matches the store path's 80 KB copy buffer).</summary>
|
|
private const int StreamReadBufferSize = 81920;
|
|
|
|
private readonly FileDb _fileDatabase;
|
|
private readonly AudioProcessor _audioProcessor;
|
|
private readonly ILoudnessAlgorithm _loudnessAlgorithm;
|
|
private readonly WaveformProfileOptions _options;
|
|
private readonly ILogger<WaveformProfileService> _logger;
|
|
|
|
public WaveformProfileService(
|
|
FileDb fileDatabase,
|
|
AudioProcessor audioProcessor,
|
|
ILoudnessAlgorithm loudnessAlgorithm,
|
|
IOptions<WaveformProfileOptions> options,
|
|
ILogger<WaveformProfileService> logger)
|
|
{
|
|
_fileDatabase = fileDatabase;
|
|
_audioProcessor = audioProcessor;
|
|
_loudnessAlgorithm = loudnessAlgorithm;
|
|
_options = options.Value;
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Computes the loudness profile from <paramref name="wavBytes"/> and stores it under
|
|
/// <paramref name="entryKey"/> in <paramref name="vaultName"/> (defaults to
|
|
/// <see cref="VaultConstants.WaveformProfiles"/> when null). Bucket resolution defaults to
|
|
/// <see cref="WaveformProfileOptions.BucketCount"/> (512) when <paramref name="bucketCount"/> is null;
|
|
/// callers pass an explicit count for higher-resolution data — e.g. the per-track high-res datum
|
|
/// derives its count from the audio duration (≈333 samples/sec, see <c>WaveformResolution</c>) so long
|
|
/// tracks are not under-sampled. This service is content-agnostic: it captures however many buckets it is told to and
|
|
/// does not itself decide the count. Returns false (and logs) on any
|
|
/// failure — a missing profile is handled gracefully downstream, so callers on the upload path
|
|
/// log-and-continue rather than failing the upload. Does not throw for expected failure modes.
|
|
/// </summary>
|
|
public async Task<bool> ComputeAndStoreAsync(
|
|
ReadOnlyMemory<byte> wavBytes,
|
|
string entryKey,
|
|
int? bucketCount = null,
|
|
string? vaultName = null)
|
|
{
|
|
var effectiveBucketCount = bucketCount ?? _options.BucketCount;
|
|
var effectiveVaultName = vaultName ?? VaultConstants.WaveformProfiles;
|
|
|
|
try
|
|
{
|
|
var pcm = _audioProcessor.TryExtractPcm(wavBytes.Span);
|
|
if (pcm is null)
|
|
{
|
|
_logger.LogWarning(
|
|
"Waveform profile not computed for {EntryKey}: WAV PCM could not be extracted.",
|
|
entryKey);
|
|
return false;
|
|
}
|
|
|
|
var value = pcm.Value;
|
|
var profile = _loudnessAlgorithm.Compute(
|
|
value.Pcm.Span,
|
|
value.Channels,
|
|
value.SampleRate,
|
|
value.BitsPerSample,
|
|
effectiveBucketCount);
|
|
|
|
var quantized = Quantize(profile);
|
|
|
|
await EnsureVaultAsync(effectiveVaultName);
|
|
|
|
var binary = new MediaBinary(new MediaBinaryParams(quantized, quantized.Length, ProfileExtension));
|
|
var stored = await _fileDatabase.RegisterResourceAsync(effectiveVaultName, entryKey, binary);
|
|
|
|
if (!stored)
|
|
{
|
|
_logger.LogWarning("Waveform profile vault write failed for {EntryKey}.", entryKey);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
catch (Exception ex) when (ex is not OperationCanceledException)
|
|
{
|
|
_logger.LogError(ex, "Waveform profile computation failed for {EntryKey}.", entryKey);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Computes a track's high-resolution loudness datum and stores it in the
|
|
/// <see cref="VaultConstants.TrackWaveforms"/> vault keyed by <paramref name="entryKey"/>. The bucket
|
|
/// count is duration-derived (≈333 samples/sec, clamped — see <see cref="WaveformResolution"/>) so the
|
|
/// datum captures at a constant time resolution regardless of track length. This is the single home
|
|
/// for "the high-res per-track datum" — the upload path, the CMS generate action, and the Mix trigger
|
|
/// all funnel through it, so every track (Mix, Session, Cut) gets an identical datum keyed the same way.
|
|
/// Returns false (logged) on any failure, per the content-agnostic contract above.
|
|
/// </summary>
|
|
public Task<bool> ComputeAndStoreHighResAsync(
|
|
ReadOnlyMemory<byte> wavBytes,
|
|
string entryKey,
|
|
double durationSeconds)
|
|
{
|
|
var bucketCount = WaveformResolution.BucketCountForDuration(durationSeconds);
|
|
return ComputeAndStoreAsync(wavBytes, entryKey, bucketCount, VaultConstants.TrackWaveforms);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Streaming counterpart of <see cref="ComputeAndStoreAsync"/>: computes and stores the fixed
|
|
/// 512-bucket player-bar profile by reading the WAV from <paramref name="openWavStream"/> in bounded
|
|
/// chunks, never materializing the whole file in a managed <c>byte[]</c>. Tri-state result matches
|
|
/// the <c>RemoveResourceAsync</c> idiom so callers can map outcomes precisely: <c>null</c> = no audio
|
|
/// stream available (the entry has no backing audio); <c>false</c> = audio present but no profile
|
|
/// computable (non-WAV / float / padded) or the vault write failed; <c>true</c> = stored. Output is
|
|
/// byte-identical to the whole-buffer path for the same WAV.
|
|
/// </summary>
|
|
public Task<bool?> ComputeAndStoreProfileStreamingAsync(
|
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
|
string entryKey,
|
|
CancellationToken ct = default) =>
|
|
RunStreamingAsync(
|
|
openWavStream, entryKey,
|
|
[(_options.BucketCount, VaultConstants.WaveformProfiles)], ct);
|
|
|
|
/// <summary>
|
|
/// Streaming counterpart of <see cref="ComputeAndStoreHighResAsync"/>: computes and stores the
|
|
/// duration-derived high-res datum (<see cref="VaultConstants.TrackWaveforms"/>) by streaming the WAV
|
|
/// from <paramref name="openWavStream"/>. <paramref name="durationSeconds"/> drives the bucket count
|
|
/// exactly as the whole-buffer path's <c>audio.Duration</c> did — pass the same vault-metadata
|
|
/// duration to keep the stored bytes identical. Tri-state result as in
|
|
/// <see cref="ComputeAndStoreProfileStreamingAsync"/>.
|
|
/// </summary>
|
|
public Task<bool?> ComputeAndStoreHighResStreamingAsync(
|
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
|
string entryKey,
|
|
double durationSeconds,
|
|
CancellationToken ct = default) =>
|
|
RunStreamingAsync(
|
|
openWavStream, entryKey,
|
|
[(WaveformResolution.BucketCountForDuration(durationSeconds), VaultConstants.TrackWaveforms)], ct);
|
|
|
|
/// <summary>
|
|
/// Computes and stores BOTH datums a track carries — the 512-bucket profile and the duration-derived
|
|
/// high-res datum — from a SINGLE streaming pass over the WAV. One sequential read of the (possibly
|
|
/// ~GB) audio feeds two independent accumulators, so memory stays O(bucket arrays + read buffer) and
|
|
/// disk I/O is halved versus two separate passes. This is the upload / replace-audio hot path. Each
|
|
/// datum's stored bytes are byte-identical to its whole-buffer counterpart. Tri-state: <c>null</c> =
|
|
/// no audio stream; <c>false</c> = not WAV-decodable or a vault write failed; <c>true</c> = both
|
|
/// datums stored. Best-effort callers ignore the result.
|
|
/// </summary>
|
|
public Task<bool?> ComputeAndStoreAllStreamingAsync(
|
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
|
string entryKey,
|
|
double durationSeconds,
|
|
CancellationToken ct = default) =>
|
|
RunStreamingAsync(
|
|
openWavStream, entryKey,
|
|
[
|
|
(_options.BucketCount, VaultConstants.WaveformProfiles),
|
|
(WaveformResolution.BucketCountForDuration(durationSeconds), VaultConstants.TrackWaveforms),
|
|
],
|
|
ct);
|
|
|
|
/// <summary>
|
|
/// Core streaming reduction: opens the WAV once, parses its header (bounded), then streams the PCM
|
|
/// data region through one loudness accumulator per requested target, storing each datum. All
|
|
/// targets are computed in the single pass. See the tri-state contract on the public wrappers.
|
|
/// </summary>
|
|
private async Task<bool?> RunStreamingAsync(
|
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
|
string entryKey,
|
|
IReadOnlyList<(int BucketCount, string VaultName)> targets,
|
|
CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
await using var stream = await openWavStream(ct);
|
|
if (stream is null)
|
|
{
|
|
// No backing audio for this entry — distinct from "present but undecodable".
|
|
return null;
|
|
}
|
|
|
|
var info = await _audioProcessor.TryReadPcmStreamInfoAsync(stream, stream.Length, ct);
|
|
if (info is null)
|
|
{
|
|
_logger.LogWarning(
|
|
"Waveform profile not computed for {EntryKey}: WAV PCM could not be extracted (streaming).",
|
|
entryKey);
|
|
return false;
|
|
}
|
|
|
|
var v = info.Value;
|
|
var accumulators = new ILoudnessAccumulator[targets.Count];
|
|
for (var i = 0; i < targets.Count; i++)
|
|
{
|
|
accumulators[i] = _loudnessAlgorithm.CreateAccumulator(
|
|
v.DataLength, v.Channels, v.SampleRate, v.BitsPerSample, targets[i].BucketCount);
|
|
}
|
|
|
|
await StreamPcmThroughAsync(stream, v.DataStart, v.DataLength, accumulators, ct);
|
|
|
|
_logger.LogInformation(
|
|
"Streaming waveform compute for {EntryKey}: {DataLength} PCM bytes, {TargetCount} datum(s), " +
|
|
"{BufferSize}B read buffer — no whole-file load.",
|
|
entryKey, v.DataLength, targets.Count, StreamReadBufferSize);
|
|
|
|
var allStored = true;
|
|
for (var i = 0; i < targets.Count; i++)
|
|
{
|
|
var profile = accumulators[i].Finish();
|
|
var quantized = Quantize(profile);
|
|
|
|
await EnsureVaultAsync(targets[i].VaultName);
|
|
var binary = new MediaBinary(new MediaBinaryParams(quantized, quantized.Length, ProfileExtension));
|
|
var stored = await _fileDatabase.RegisterResourceAsync(targets[i].VaultName, entryKey, binary);
|
|
if (!stored)
|
|
{
|
|
_logger.LogWarning(
|
|
"Waveform vault write failed for {EntryKey} in {VaultName}.", entryKey, targets[i].VaultName);
|
|
allStored = false;
|
|
}
|
|
}
|
|
|
|
return allStored;
|
|
}
|
|
catch (Exception ex) when (ex is not OperationCanceledException)
|
|
{
|
|
_logger.LogError(ex, "Streaming waveform computation failed for {EntryKey}.", entryKey);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Seeks to the PCM data region and streams exactly <paramref name="dataLength"/> bytes through each
|
|
/// accumulator in bounded reads. The accumulators carry partial frames internally, so the read
|
|
/// boundaries need not align to frames. Peak memory is one read buffer — independent of file size.
|
|
/// </summary>
|
|
private static async Task StreamPcmThroughAsync(
|
|
Stream stream, long dataStart, long dataLength, ILoudnessAccumulator[] accumulators, CancellationToken ct)
|
|
{
|
|
stream.Seek(dataStart, SeekOrigin.Begin);
|
|
|
|
var buffer = new byte[StreamReadBufferSize];
|
|
var remaining = dataLength;
|
|
while (remaining > 0)
|
|
{
|
|
var want = (int)Math.Min(buffer.Length, remaining);
|
|
var read = await stream.ReadAsync(buffer.AsMemory(0, want), ct);
|
|
if (read == 0)
|
|
break;
|
|
|
|
var span = buffer.AsSpan(0, read);
|
|
foreach (var accumulator in accumulators)
|
|
{
|
|
accumulator.Add(span);
|
|
}
|
|
|
|
remaining -= read;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the stored quantized profile bytes for a track from <paramref name="vaultName"/>
|
|
/// (defaults to <see cref="VaultConstants.WaveformProfiles"/> when null), or null if no profile
|
|
/// is stored (existing tracks predate profiling, and computation may have failed). Each byte is
|
|
/// a peak-normalized loudness value in [0, 255].
|
|
/// </summary>
|
|
public async Task<byte[]?> GetProfileAsync(string entryKey, string? vaultName = null)
|
|
{
|
|
var binary = await _fileDatabase.LoadResourceAsync<MediaBinary>(
|
|
vaultName ?? VaultConstants.WaveformProfiles, entryKey);
|
|
return binary?.Buffer;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Maps each [0, 1] bucket to a [0, 255] byte. 1.0 maps to 255; the multiply-by-255 with a
|
|
/// truncating cast keeps every in-range value within a byte without a clamp branch.
|
|
/// </summary>
|
|
private static byte[] Quantize(double[] profile)
|
|
{
|
|
var bytes = new byte[profile.Length];
|
|
for (var i = 0; i < profile.Length; i++)
|
|
{
|
|
bytes[i] = (byte)(profile[i] * 255);
|
|
}
|
|
return bytes;
|
|
}
|
|
|
|
private async Task EnsureVaultAsync(string vaultName)
|
|
{
|
|
if (!_fileDatabase.HasVault(vaultName))
|
|
{
|
|
await _fileDatabase.CreateVaultAsync(vaultName, MediaVaultType.Media);
|
|
}
|
|
}
|
|
}
|