Add server-side waveform loudness profiling on track upload

ILoudnessAlgorithm strategy (RmsLoudnessAlgorithm first impl), WaveformProfileService
stores quantized byte[] sidecar in new MediaFileVault (profiles vault), wired into
UnifiedTrackService.UploadAsync; failure is logged and swallowed. WaveformProfileDto
and WaveformProfileOptions in shared projects.
This commit is contained in:
daniel-c-harvey
2026-06-05 16:38:02 -04:00
parent eed99df0dd
commit fa57861dbf
16 changed files with 544 additions and 10 deletions
@@ -9,4 +9,9 @@ public static class VaultConstants
/// Vault name for storing audio tracks
/// </summary>
public const string Tracks = "tracks";
/// <summary>
/// Vault name for storing waveform loudness profile sidecars, keyed by track EntryKey.
/// </summary>
public const string WaveformProfiles = "waveform-profiles";
}
+1
View File
@@ -12,6 +12,7 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
@@ -219,6 +219,32 @@ public class AudioVault : MediaVault
}
}
/// <summary>
/// Concrete vault for plain <see cref="MediaBinary"/> entries (vault type
/// <see cref="MediaVaultType.Media"/>) — bytes plus an extension, no audio/image-specific
/// metadata. Used for sidecar artifacts such as waveform loudness profiles. The base
/// <see cref="MediaVault"/> already handles Media-typed storage via the registry; this only
/// provides the concrete factory the Image and Audio vaults also provide.
/// </summary>
public class MediaFileVault : MediaVault
{
private MediaFileVault(string rootPath, VaultIndex index, IndexFactoryService? factoryService = null)
: base(rootPath, index, factoryService) { }
public static async Task<MediaFileVault?> FromAsync(string rootPath, IndexFactoryService? factoryService = null)
{
var factory = factoryService ?? new IndexFactoryService();
var index = await factory.LoadOrCreateVaultIndexAsync(rootPath, MediaVaultType.Media);
if (index != null)
{
return new MediaFileVault(rootPath, (VaultIndex)index, factory);
}
return null;
}
}
/// <summary>
/// An open read-only stream over a vault entry plus the extension needed to
/// resolve its MIME type. Caller owns the stream and must dispose it.
@@ -11,6 +11,7 @@ public static class MediaVaultFactory
{
return mediaType switch
{
MediaVaultType.Media => await MediaFileVault.FromAsync(rootPath, factoryService),
MediaVaultType.Image => await ImageVault.FromAsync(rootPath, factoryService),
MediaVaultType.Audio => await AudioVault.FromAsync(rootPath, factoryService),
_ => null
@@ -31,7 +31,8 @@ public class SimpleMediaTypeRegistry : IMediaTypeRegistry
dto => MediaBinary.From(dto),
binary => new MediaBinaryDto(binary),
(key, ext, _) => new MetaData(key, ext),
(binary, meta) => new MediaBinaryParams(binary.Buffer, binary.Size, meta.Extension));
(binary, meta) => new MediaBinaryParams(binary.Buffer, binary.Size, meta.Extension),
async path => await MediaFileVault.FromAsync(path));
RegisterType<ImageBinary, ImageBinaryParams, ImageBinaryDto, ImageMetaData>(
MediaVaultType.Image,
+64 -1
View File
@@ -45,6 +45,55 @@ public class AudioProcessor
}
}
/// <summary>
/// Extracts the raw PCM data region and format parameters from a WAV buffer, reusing the
/// same chunk-walk and validation as metadata extraction. Returns null if the buffer is not
/// a valid PCM WAV (callers treat a null as "no profile computable" and continue) — unlike
/// <see cref="ExtractWavMetadata"/>, this does NOT fall back to synthetic defaults, because a
/// loudness profile over fabricated silence would be misleading.
/// </summary>
public PcmData? TryExtractPcm(ReadOnlySpan<byte> buffer)
{
// Copy the span to an array so the existing array-based parsers can be reused. The PCM
// slice returned is a view over this array (no second copy of the data region).
var bytes = buffer.ToArray();
var validation = ValidateWavStructure(bytes);
if (!validation.IsValid)
{
return null;
}
WavMetadata metadata;
try
{
metadata = ParseWavMetadata(bytes, validation);
ValidateAudioParameters(metadata);
}
catch
{
return null;
}
// Data bytes begin 8 past the "data" chunk id (4 id + 4 size). Clamp the declared size to
// what is actually present — some encoders write a size that overshoots the file.
var dataStart = validation.DataChunkPos + 8;
if (dataStart > bytes.Length)
{
return null;
}
var available = bytes.Length - dataStart;
var dataLength = Math.Min(metadata.DataSize, available);
if (dataLength <= 0)
{
return null;
}
var pcm = new ReadOnlyMemory<byte>(bytes, dataStart, dataLength);
return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
}
/// <summary>
/// Extracts metadata from WAV file buffer with comprehensive validation
/// </summary>
@@ -268,4 +317,18 @@ public class AudioProcessor
public int FmtChunkPos { get; set; }
public int DataChunkPos { get; set; }
}
}
}
/// <summary>
/// The raw PCM sample region of a WAV plus the format parameters needed to interpret it.
/// <see cref="Pcm"/> is a view over the decoded buffer — the data chunk only, header excluded.
/// </summary>
/// <param name="Pcm">The PCM sample bytes (interleaved by channel, little-endian).</param>
/// <param name="Channels">Number of interleaved channels.</param>
/// <param name="SampleRate">Samples per second.</param>
/// <param name="BitsPerSample">Bit depth per sample (8, 16, 24, or 32).</param>
public readonly record struct PcmData(
ReadOnlyMemory<byte> Pcm,
int Channels,
int SampleRate,
int BitsPerSample);
@@ -0,0 +1,23 @@
namespace DeepDrftContent.Processors;
/// <summary>
/// Strategy for reducing a stream of PCM samples to a fixed-length, peak-normalized loudness
/// envelope. Swappable so the loudness measure (RMS today, LUFS later) can change without
/// touching <c>WaveformProfileService</c>, the stored wire format, or the frontend renderer.
/// </summary>
public interface ILoudnessAlgorithm
{
/// <summary>
/// Computes a peak-normalized loudness profile from raw interleaved PCM.
/// </summary>
/// <param name="pcmData">Interleaved, little-endian PCM sample bytes (the WAV data chunk).</param>
/// <param name="channels">Number of interleaved channels; averaged to mono per sample.</param>
/// <param name="sampleRate">Samples per second (unused by RMS but part of the contract for measures that need it).</param>
/// <param name="bitsPerSample">Bit depth (8 unsigned, 16/24/32 signed) used to decode samples.</param>
/// <param name="bucketCount">Number of equal time slices to reduce the signal to.</param>
/// <returns>
/// A <c>double[bucketCount]</c>, each value in [0, 1], peak-normalized so the loudest bucket
/// is 1. All zeros when the signal is silent (peak is 0) or no samples are present.
/// </returns>
double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount);
}
@@ -0,0 +1,138 @@
namespace DeepDrftContent.Processors;
/// <summary>
/// Loudness via root-mean-square amplitude per time bucket. Decodes signed PCM (8-bit unsigned,
/// 16/24/32-bit signed little-endian), averages channels to mono, partitions the frames into
/// equal time slices, takes the RMS of each slice, then peak-normalizes so the loudest bucket is 1.
/// No external audio dependency — operates directly on the WAV data-chunk bytes.
/// </summary>
public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
{
public double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount)
{
if (bucketCount <= 0)
{
throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive.");
}
var result = new double[bucketCount];
if (channels <= 0)
{
return result;
}
var bytesPerSample = bitsPerSample / 8;
if (bytesPerSample <= 0)
{
return result;
}
var bytesPerFrame = bytesPerSample * channels;
var frameCount = pcmData.Length / bytesPerFrame;
if (frameCount == 0)
{
return result;
}
// Sum of squared mono amplitudes and the frame count, per bucket. A frame's bucket is
// determined by its position in the timeline so buckets are equal-duration slices.
var sumSquares = new double[bucketCount];
var counts = new long[bucketCount];
for (var frame = 0; frame < frameCount; frame++)
{
var frameStart = frame * bytesPerFrame;
double channelSum = 0;
for (var ch = 0; ch < channels; ch++)
{
var sampleStart = frameStart + ch * bytesPerSample;
channelSum += ReadSampleNormalized(pcmData, sampleStart, bitsPerSample);
}
var mono = channelSum / channels;
// long math avoids overflow on large files before the divide back into bucket index.
var bucket = (int)((long)frame * bucketCount / frameCount);
if (bucket >= bucketCount)
{
bucket = bucketCount - 1;
}
sumSquares[bucket] += mono * mono;
counts[bucket]++;
}
var peak = 0.0;
for (var i = 0; i < bucketCount; i++)
{
if (counts[i] > 0)
{
result[i] = Math.Sqrt(sumSquares[i] / counts[i]);
if (result[i] > peak)
{
peak = result[i];
}
}
}
if (peak <= 0)
{
// Silence — return all zeros (Array is already zero-initialized).
Array.Clear(result);
return result;
}
for (var i = 0; i < bucketCount; i++)
{
result[i] /= peak;
}
return result;
}
/// <summary>
/// Decodes one PCM sample at <paramref name="offset"/> to a normalized amplitude in [-1, 1].
/// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian.
/// </summary>
private static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
{
switch (bitsPerSample)
{
case 8:
// Unsigned, midpoint 128.
return (data[offset] - 128) / 128.0;
case 16:
{
short sample = (short)(data[offset] | (data[offset + 1] << 8));
return sample / 32768.0;
}
case 24:
{
// Sign-extend the 24-bit little-endian value into an int.
int raw = data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16);
if ((raw & 0x800000) != 0)
{
raw |= unchecked((int)0xFF000000);
}
return raw / 8388608.0;
}
case 32:
{
int sample = data[offset]
| (data[offset + 1] << 8)
| (data[offset + 2] << 16)
| (data[offset + 3] << 24);
return sample / 2147483648.0;
}
default:
throw new ArgumentOutOfRangeException(
nameof(bitsPerSample), bitsPerSample, "Unsupported PCM bit depth.");
}
}
}
@@ -0,0 +1,11 @@
namespace DeepDrftContent.Processors;
/// <summary>
/// Configuration for waveform loudness profiling. <see cref="BucketCount"/> is the stored
/// resolution — the number of loudness buckets computed and persisted per track, which is also
/// the bar count the frontend WaveformSeeker renders.
/// </summary>
public class WaveformProfileOptions
{
public int BucketCount { get; set; } = 512;
}
@@ -0,0 +1,123 @@
using DeepDrftContent.Constants;
using DeepDrftContent.FileDatabase.Models;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase;
namespace DeepDrftContent.Processors;
/// <summary>
/// Computes a track's waveform loudness profile from its WAV bytes and persists it as a sidecar
/// in the <see cref="VaultConstants.WaveformProfiles"/> vault, keyed by the track's EntryKey.
/// The profile is the upload-time, off-the-playback-path representation the frontend fetches to
/// render the WaveformSeeker. The loudness measure is injected (<see cref="ILoudnessAlgorithm"/>)
/// so it can be swapped without changing storage or the wire format.
/// </summary>
public class WaveformProfileService
{
private const string ProfileExtension = ".wfp";
private readonly FileDb _fileDatabase;
private readonly AudioProcessor _audioProcessor;
private readonly ILoudnessAlgorithm _loudnessAlgorithm;
private readonly WaveformProfileOptions _options;
private readonly ILogger<WaveformProfileService> _logger;
public WaveformProfileService(
FileDb fileDatabase,
AudioProcessor audioProcessor,
ILoudnessAlgorithm loudnessAlgorithm,
IOptions<WaveformProfileOptions> options,
ILogger<WaveformProfileService> logger)
{
_fileDatabase = fileDatabase;
_audioProcessor = audioProcessor;
_loudnessAlgorithm = loudnessAlgorithm;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Computes the loudness profile from <paramref name="wavBytes"/> and stores it under
/// <paramref name="entryKey"/>. Returns false (and logs) on any failure — a missing profile
/// is handled gracefully downstream, so callers on the upload path log-and-continue rather
/// than failing the upload. Does not throw for expected failure modes.
/// </summary>
public async Task<bool> ComputeAndStoreAsync(ReadOnlyMemory<byte> wavBytes, string entryKey)
{
try
{
var pcm = _audioProcessor.TryExtractPcm(wavBytes.Span);
if (pcm is null)
{
_logger.LogWarning(
"Waveform profile not computed for {EntryKey}: WAV PCM could not be extracted.",
entryKey);
return false;
}
var value = pcm.Value;
var profile = _loudnessAlgorithm.Compute(
value.Pcm.Span,
value.Channels,
value.SampleRate,
value.BitsPerSample,
_options.BucketCount);
var quantized = Quantize(profile);
await EnsureVaultAsync();
var binary = new MediaBinary(new MediaBinaryParams(quantized, quantized.Length, ProfileExtension));
var stored = await _fileDatabase.RegisterResourceAsync(
VaultConstants.WaveformProfiles, entryKey, binary);
if (!stored)
{
_logger.LogWarning("Waveform profile vault write failed for {EntryKey}.", entryKey);
return false;
}
return true;
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogError(ex, "Waveform profile computation failed for {EntryKey}.", entryKey);
return false;
}
}
/// <summary>
/// Returns the stored quantized profile bytes for a track, or null if no profile is stored
/// (existing tracks predate profiling, and computation may have failed). Each byte is a
/// peak-normalized loudness value in [0, 255].
/// </summary>
public async Task<byte[]?> GetProfileAsync(string entryKey)
{
var binary = await _fileDatabase.LoadResourceAsync<MediaBinary>(
VaultConstants.WaveformProfiles, entryKey);
return binary?.Buffer;
}
/// <summary>
/// Maps each [0, 1] bucket to a [0, 255] byte. 1.0 maps to 255; the multiply-by-255 with a
/// truncating cast keeps every in-range value within a byte without a clamp branch.
/// </summary>
private static byte[] Quantize(double[] profile)
{
var bytes = new byte[profile.Length];
for (var i = 0; i < profile.Length; i++)
{
bytes[i] = (byte)(profile[i] * 255);
}
return bytes;
}
private async Task EnsureVaultAsync()
{
if (!_fileDatabase.HasVault(VaultConstants.WaveformProfiles))
{
await _fileDatabase.CreateVaultAsync(VaultConstants.WaveformProfiles, MediaVaultType.Media);
}
}
}