Add server-side waveform loudness profiling on track upload
ILoudnessAlgorithm strategy (RmsLoudnessAlgorithm first impl), WaveformProfileService stores quantized byte[] sidecar in new MediaFileVault (profiles vault), wired into UnifiedTrackService.UploadAsync; failure is logged and swallowed. WaveformProfileDto and WaveformProfileOptions in shared projects.
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
namespace DeepDrftContent.Processors;
|
||||
|
||||
/// <summary>
|
||||
/// Loudness via root-mean-square amplitude per time bucket. Decodes signed PCM (8-bit unsigned,
|
||||
/// 16/24/32-bit signed little-endian), averages channels to mono, partitions the frames into
|
||||
/// equal time slices, takes the RMS of each slice, then peak-normalizes so the loudest bucket is 1.
|
||||
/// No external audio dependency — operates directly on the WAV data-chunk bytes.
|
||||
/// </summary>
|
||||
public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
||||
{
|
||||
public double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
||||
{
|
||||
if (bucketCount <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive.");
|
||||
}
|
||||
|
||||
var result = new double[bucketCount];
|
||||
|
||||
if (channels <= 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
var bytesPerSample = bitsPerSample / 8;
|
||||
if (bytesPerSample <= 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
var bytesPerFrame = bytesPerSample * channels;
|
||||
var frameCount = pcmData.Length / bytesPerFrame;
|
||||
if (frameCount == 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
// Sum of squared mono amplitudes and the frame count, per bucket. A frame's bucket is
|
||||
// determined by its position in the timeline so buckets are equal-duration slices.
|
||||
var sumSquares = new double[bucketCount];
|
||||
var counts = new long[bucketCount];
|
||||
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
var frameStart = frame * bytesPerFrame;
|
||||
|
||||
double channelSum = 0;
|
||||
for (var ch = 0; ch < channels; ch++)
|
||||
{
|
||||
var sampleStart = frameStart + ch * bytesPerSample;
|
||||
channelSum += ReadSampleNormalized(pcmData, sampleStart, bitsPerSample);
|
||||
}
|
||||
|
||||
var mono = channelSum / channels;
|
||||
|
||||
// long math avoids overflow on large files before the divide back into bucket index.
|
||||
var bucket = (int)((long)frame * bucketCount / frameCount);
|
||||
if (bucket >= bucketCount)
|
||||
{
|
||||
bucket = bucketCount - 1;
|
||||
}
|
||||
|
||||
sumSquares[bucket] += mono * mono;
|
||||
counts[bucket]++;
|
||||
}
|
||||
|
||||
var peak = 0.0;
|
||||
for (var i = 0; i < bucketCount; i++)
|
||||
{
|
||||
if (counts[i] > 0)
|
||||
{
|
||||
result[i] = Math.Sqrt(sumSquares[i] / counts[i]);
|
||||
if (result[i] > peak)
|
||||
{
|
||||
peak = result[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (peak <= 0)
|
||||
{
|
||||
// Silence — return all zeros (Array is already zero-initialized).
|
||||
Array.Clear(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
for (var i = 0; i < bucketCount; i++)
|
||||
{
|
||||
result[i] /= peak;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decodes one PCM sample at <paramref name="offset"/> to a normalized amplitude in [-1, 1].
|
||||
/// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian.
|
||||
/// </summary>
|
||||
private static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
|
||||
{
|
||||
switch (bitsPerSample)
|
||||
{
|
||||
case 8:
|
||||
// Unsigned, midpoint 128.
|
||||
return (data[offset] - 128) / 128.0;
|
||||
|
||||
case 16:
|
||||
{
|
||||
short sample = (short)(data[offset] | (data[offset + 1] << 8));
|
||||
return sample / 32768.0;
|
||||
}
|
||||
|
||||
case 24:
|
||||
{
|
||||
// Sign-extend the 24-bit little-endian value into an int.
|
||||
int raw = data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16);
|
||||
if ((raw & 0x800000) != 0)
|
||||
{
|
||||
raw |= unchecked((int)0xFF000000);
|
||||
}
|
||||
return raw / 8388608.0;
|
||||
}
|
||||
|
||||
case 32:
|
||||
{
|
||||
int sample = data[offset]
|
||||
| (data[offset + 1] << 8)
|
||||
| (data[offset + 2] << 16)
|
||||
| (data[offset + 3] << 24);
|
||||
return sample / 2147483648.0;
|
||||
}
|
||||
|
||||
default:
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(bitsPerSample), bitsPerSample, "Unsupported PCM bit depth.");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user