Files
deepdrft/DeepDrftContent/Processors/RmsLoudnessAlgorithm.cs
T
daniel-c-harvey 4e34696719 feat(mix-visualizer): Phase 10 tuning — smooth waveform, bouncy collision, 8 knobs
Smooth the loudness contour (~50 ms envelope at preprocessing + decode-time, plus
smootherstep render reconstruction); retune wax↔waveform collision to bouncy/sub-unity
(no explosion/stuck/jitter); split the bubbles knob into fluid-amount + fluid-viscosity
(cohesion via uniform-only smin/wobble); retune scroll/gravity/heat/width ranges; make
the colour rotation visible and boost OKLab chroma; the controls bar now holds its
layout and hides only its knobs via a Visible parameter.
2026-06-17 05:12:15 -04:00

196 lines
7.2 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
namespace DeepDrftContent.Processors;
/// <summary>
/// Loudness via root-mean-square amplitude per time bucket. Decodes signed PCM (8-bit unsigned,
/// 16/24/32-bit signed little-endian), averages channels to mono, partitions the frames into
/// equal time slices, takes the RMS of each slice, applies a ~50 ms envelope-follower smoothing
/// so the contour reads as a smooth curve rather than a spikey polygon, then peak-normalizes so
/// the loudest bucket is 1. No external audio dependency — operates directly on the WAV data-chunk bytes.
/// </summary>
public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
{
/// <summary>
/// Envelope-follower time constant, seconds. ~50 ms is the spec's smoothing target (Phase 10
/// tuning): long enough to round off the per-bucket RMS spikes into a smooth ribbon contour,
/// short enough that real loudness transients (kicks, drops) still read. Applied as a symmetric
/// (forward+backward) one-pole filter so the smoothing introduces no time lag.
/// </summary>
public const double SmoothingTimeConstantSeconds = 0.05;
public double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount)
{
if (bucketCount <= 0)
{
throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive.");
}
var result = new double[bucketCount];
if (channels <= 0)
{
return result;
}
var bytesPerSample = bitsPerSample / 8;
if (bytesPerSample <= 0)
{
return result;
}
var bytesPerFrame = bytesPerSample * channels;
var frameCount = pcmData.Length / bytesPerFrame;
if (frameCount == 0)
{
return result;
}
// Sum of squared mono amplitudes and the frame count, per bucket. A frame's bucket is
// determined by its position in the timeline so buckets are equal-duration slices.
var sumSquares = new double[bucketCount];
var counts = new long[bucketCount];
for (var frame = 0; frame < frameCount; frame++)
{
var frameStart = frame * bytesPerFrame;
double channelSum = 0;
for (var ch = 0; ch < channels; ch++)
{
var sampleStart = frameStart + ch * bytesPerSample;
channelSum += ReadSampleNormalized(pcmData, sampleStart, bitsPerSample);
}
var mono = channelSum / channels;
// long math avoids overflow on large files before the divide back into bucket index.
var bucket = (int)((long)frame * bucketCount / frameCount);
if (bucket >= bucketCount)
{
bucket = bucketCount - 1;
}
sumSquares[bucket] += mono * mono;
counts[bucket]++;
}
for (var i = 0; i < bucketCount; i++)
{
if (counts[i] > 0)
{
result[i] = Math.Sqrt(sumSquares[i] / counts[i]);
}
}
// Envelope smoothing (~50 ms): round the spikey per-bucket RMS into a smooth contour before
// peak-normalization, so the rendered ribbon reads as a continuous curve, not faceted polygons.
// Each bucket spans (totalSeconds / bucketCount) of audio; the filter coefficient is derived
// from that against the time constant so the smoothing is duration-aware, not a fixed window.
var totalSeconds = (double)frameCount / sampleRate;
var bucketSeconds = totalSeconds / bucketCount;
SmoothEnvelope(result, bucketSeconds);
var peak = 0.0;
for (var i = 0; i < bucketCount; i++)
{
if (result[i] > peak)
{
peak = result[i];
}
}
if (peak <= 0)
{
// Silence — return all zeros (Array is already zero-initialized).
Array.Clear(result);
return result;
}
for (var i = 0; i < bucketCount; i++)
{
result[i] /= peak;
}
return result;
}
/// <summary>
/// Symmetric one-pole envelope smoothing over the per-bucket loudness, in place. A forward pass
/// then a backward pass cancels the single-pole phase lag, so the smoothed contour stays aligned
/// with the audio (no rightward time shift). The coefficient <c>a = exp(bucketSeconds / τ)</c>
/// gives a ~<paramref name="bucketSeconds"/>-relative response targeting the ~50 ms time constant:
/// each bucket blends <c>(1 a)</c> of itself with <c>a</c> of the running envelope. A near-zero
/// or non-finite bucket duration leaves the data untouched (nothing to smooth meaningfully).
/// </summary>
private static void SmoothEnvelope(double[] data, double bucketSeconds)
{
if (data.Length < 2 || bucketSeconds <= 0 || !double.IsFinite(bucketSeconds))
{
return;
}
var a = Math.Exp(-bucketSeconds / SmoothingTimeConstantSeconds);
// a→1 means buckets are far finer than τ (heavy smoothing); a→0 means each bucket already
// spans ≫ τ, so smoothing is a no-op. Either extreme is handled by the blend below.
// Forward pass.
var env = data[0];
for (var i = 0; i < data.Length; i++)
{
env = a * env + (1 - a) * data[i];
data[i] = env;
}
// Backward pass (zero-phase): smooth the forward result in reverse so the net lag is zero.
env = data[^1];
for (var i = data.Length - 1; i >= 0; i--)
{
env = a * env + (1 - a) * data[i];
data[i] = env;
}
}
/// <summary>
/// Decodes one PCM sample at <paramref name="offset"/> to a normalized amplitude in [-1, 1].
/// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian.
/// </summary>
private static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
{
switch (bitsPerSample)
{
case 8:
// Unsigned, midpoint 128.
return (data[offset] - 128) / 128.0;
case 16:
{
short sample = (short)(data[offset] | (data[offset + 1] << 8));
return sample / 32768.0;
}
case 24:
{
// Sign-extend the 24-bit little-endian value into an int.
int raw = data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16);
if ((raw & 0x800000) != 0)
{
raw |= unchecked((int)0xFF000000);
}
return raw / 8388608.0;
}
case 32:
{
int sample = data[offset]
| (data[offset + 1] << 8)
| (data[offset + 2] << 16)
| (data[offset + 3] << 24);
return sample / 2147483648.0;
}
default:
throw new ArgumentOutOfRangeException(
nameof(bitsPerSample), bitsPerSample, "Unsupported PCM bit depth.");
}
}
}