Stream the waveform compute so large uploads no longer buffer the whole file (Wave 2 OOM)

2026-06-25 21:49:11 -04:00
parent aa0b64329f
commit 9347f11ff0
10 changed files with 594 additions and 120 deletions
@@ -18,100 +18,27 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
    /// </summary>
    public const double SmoothingTimeConstantSeconds = 0.005;

+    /// <summary>
+    /// Whole-buffer reduction. Defined in terms of <see cref="CreateAccumulator"/> so the streaming and
+    /// whole-buffer paths share one decode + finalize implementation — byte-identical output by
+    /// construction, not by parallel maintenance.
+    /// </summary>
    public double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount)
+    {
+        var accumulator = CreateAccumulator(pcmData.Length, channels, sampleRate, bitsPerSample, bucketCount);
+        accumulator.Add(pcmData);
+        return accumulator.Finish();
+    }
+
+    public ILoudnessAccumulator CreateAccumulator(
+        long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount)
    {
        if (bucketCount <= 0)
        {
            throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive.");
        }

-        var result = new double[bucketCount];
-
-        if (channels <= 0)
-        {
-            return result;
-        }
-
-        var bytesPerSample = bitsPerSample / 8;
-        if (bytesPerSample <= 0)
-        {
-            return result;
-        }
-
-        var bytesPerFrame = bytesPerSample * channels;
-        var frameCount = pcmData.Length / bytesPerFrame;
-        if (frameCount == 0)
-        {
-            return result;
-        }
-
-        // Sum of squared mono amplitudes and the frame count, per bucket. A frame's bucket is
-        // determined by its position in the timeline so buckets are equal-duration slices.
-        var sumSquares = new double[bucketCount];
-        var counts = new long[bucketCount];
-
-        for (var frame = 0; frame < frameCount; frame++)
-        {
-            var frameStart = frame * bytesPerFrame;
-
-            double channelSum = 0;
-            for (var ch = 0; ch < channels; ch++)
-            {
-                var sampleStart = frameStart + ch * bytesPerSample;
-                channelSum += ReadSampleNormalized(pcmData, sampleStart, bitsPerSample);
-            }
-
-            var mono = channelSum / channels;
-
-            // long math avoids overflow on large files before the divide back into bucket index.
-            var bucket = (int)((long)frame * bucketCount / frameCount);
-            if (bucket >= bucketCount)
-            {
-                bucket = bucketCount - 1;
-            }
-
-            sumSquares[bucket] += mono * mono;
-            counts[bucket]++;
-        }
-
-        for (var i = 0; i < bucketCount; i++)
-        {
-            if (counts[i] > 0)
-            {
-                result[i] = Math.Sqrt(sumSquares[i] / counts[i]);
-            }
-        }
-
-        // Envelope smoothing (~15 ms): round the spikey per-bucket RMS into a smooth contour before
-        // peak-normalization, so the rendered ribbon reads as a continuous curve, not faceted polygons.
-        // Each bucket spans (totalSeconds / bucketCount) of audio; the filter coefficient is derived
-        // from that against the time constant so the smoothing is duration-aware, not a fixed window.
-        var totalSeconds = (double)frameCount / sampleRate;
-        var bucketSeconds = totalSeconds / bucketCount;
-        SmoothEnvelope(result, bucketSeconds);
-
-        var peak = 0.0;
-        for (var i = 0; i < bucketCount; i++)
-        {
-            if (result[i] > peak)
-            {
-                peak = result[i];
-            }
-        }
-
-        if (peak <= 0)
-        {
-            // Silence — return all zeros (Array is already zero-initialized).
-            Array.Clear(result);
-            return result;
-        }
-
-        for (var i = 0; i < bucketCount; i++)
-        {
-            result[i] /= peak;
-        }
-
-        return result;
+        return new RmsLoudnessAccumulator(pcmByteLength, channels, sampleRate, bitsPerSample, bucketCount);
    }

    /// <summary>
@@ -122,7 +49,7 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
    /// each bucket blends <c>(1 − a)</c> of itself with <c>a</c> of the running envelope. A near-zero
    /// or non-finite bucket duration leaves the data untouched (nothing to smooth meaningfully).
    /// </summary>
-    private static void SmoothEnvelope(double[] data, double bucketSeconds)
+    internal static void SmoothEnvelope(double[] data, double bucketSeconds)
    {
        if (data.Length < 2 || bucketSeconds <= 0 || !double.IsFinite(bucketSeconds))
        {
@@ -154,7 +81,7 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
    /// Decodes one PCM sample at <paramref name="offset"/> to a normalized amplitude in [-1, 1].
    /// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian.
    /// </summary>
-    private static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
+    internal static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
    {
        switch (bitsPerSample)
        {
@@ -194,3 +121,167 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
        }
    }
 }
+
+/// <summary>
+/// Single-pass RMS accumulator backing <see cref="RmsLoudnessAlgorithm"/>. Frames are fed via
+/// <see cref="Add"/> in arbitrary chunks; a partial frame straddling a chunk boundary is carried in a
+/// one-frame buffer. The per-frame decode, bucket assignment, and per-bucket accumulation are the exact
+/// arithmetic the former whole-buffer loop used, in the same frame order, so the floating-point result
+/// is bit-identical whether the PCM arrives in one span or many. <see cref="Finish"/> applies the same
+/// envelope smoothing and peak-normalization as before. Memory is O(bucketCount + one frame).
+/// </summary>
+public sealed class RmsLoudnessAccumulator : ILoudnessAccumulator
+{
+    private readonly int _channels;
+    private readonly int _sampleRate;
+    private readonly int _bitsPerSample;
+    private readonly int _bucketCount;
+    private readonly int _bytesPerSample;
+    private readonly int _bytesPerFrame;
+    private readonly long _frameCount;
+
+    private readonly double[] _sumSquares;
+    private readonly long[] _counts;
+    private readonly byte[] _carry;
+    private int _carryLen;
+    private long _frameIndex;
+
+    internal RmsLoudnessAccumulator(long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount)
+    {
+        _channels = channels;
+        _sampleRate = sampleRate;
+        _bitsPerSample = bitsPerSample;
+        _bucketCount = bucketCount;
+        _sumSquares = new double[bucketCount];
+        _counts = new long[bucketCount];
+
+        // Guards mirror the former whole-buffer Compute exactly: any degenerate parameter leaves
+        // _frameCount at 0, so Add is a no-op and Finish returns the zero-initialized profile.
+        _bytesPerSample = bitsPerSample / 8;
+        if (channels <= 0 || _bytesPerSample <= 0)
+        {
+            _bytesPerFrame = 0;
+            _frameCount = 0;
+            _carry = [];
+            return;
+        }
+
+        _bytesPerFrame = _bytesPerSample * channels;
+        _frameCount = pcmByteLength / _bytesPerFrame;
+        _carry = new byte[_bytesPerFrame];
+    }
+
+    public void Add(ReadOnlySpan<byte> pcmChunk)
+    {
+        if (_frameIndex >= _frameCount)
+        {
+            return; // degenerate input, or every expected frame already consumed
+        }
+
+        var pos = 0;
+
+        // Complete a frame carried from the previous chunk first.
+        if (_carryLen > 0)
+        {
+            var need = _bytesPerFrame - _carryLen;
+            var take = Math.Min(need, pcmChunk.Length);
+            pcmChunk.Slice(0, take).CopyTo(_carry.AsSpan(_carryLen));
+            _carryLen += take;
+            pos += take;
+
+            if (_carryLen < _bytesPerFrame)
+            {
+                return; // still not a full frame
+            }
+
+            ProcessFrame(_carry);
+            _carryLen = 0;
+            if (_frameIndex >= _frameCount)
+            {
+                return;
+            }
+        }
+
+        // Whole frames directly from the chunk.
+        while (pos + _bytesPerFrame <= pcmChunk.Length && _frameIndex < _frameCount)
+        {
+            ProcessFrame(pcmChunk.Slice(pos, _bytesPerFrame));
+            pos += _bytesPerFrame;
+        }
+
+        // Stash a trailing partial frame for the next chunk — but only while frames are still expected.
+        // A trailing partial frame on the final chunk is dropped, matching the whole-buffer path.
+        if (_frameIndex < _frameCount && pos < pcmChunk.Length)
+        {
+            var remainder = pcmChunk.Slice(pos);
+            remainder.CopyTo(_carry);
+            _carryLen = remainder.Length;
+        }
+    }
+
+    private void ProcessFrame(ReadOnlySpan<byte> frame)
+    {
+        double channelSum = 0;
+        for (var ch = 0; ch < _channels; ch++)
+        {
+            channelSum += RmsLoudnessAlgorithm.ReadSampleNormalized(frame, ch * _bytesPerSample, _bitsPerSample);
+        }
+
+        var mono = channelSum / _channels;
+
+        // long math avoids overflow on large files before the divide back into bucket index.
+        var bucket = (int)(_frameIndex * _bucketCount / _frameCount);
+        if (bucket >= _bucketCount)
+        {
+            bucket = _bucketCount - 1;
+        }
+
+        _sumSquares[bucket] += mono * mono;
+        _counts[bucket]++;
+        _frameIndex++;
+    }
+
+    public double[] Finish()
+    {
+        var result = new double[_bucketCount];
+        if (_frameCount == 0)
+        {
+            return result; // degenerate input — all zeros, as the whole-buffer guards returned
+        }
+
+        for (var i = 0; i < _bucketCount; i++)
+        {
+            if (_counts[i] > 0)
+            {
+                result[i] = Math.Sqrt(_sumSquares[i] / _counts[i]);
+            }
+        }
+
+        // Envelope smoothing (~15 ms) then peak-normalization — identical to the whole-buffer finalize.
+        var totalSeconds = (double)_frameCount / _sampleRate;
+        var bucketSeconds = totalSeconds / _bucketCount;
+        RmsLoudnessAlgorithm.SmoothEnvelope(result, bucketSeconds);
+
+        var peak = 0.0;
+        for (var i = 0; i < _bucketCount; i++)
+        {
+            if (result[i] > peak)
+            {
+                peak = result[i];
+            }
+        }
+
+        if (peak <= 0)
+        {
+            Array.Clear(result);
+            return result;
+        }
+
+        for (var i = 0; i < _bucketCount; i++)
+        {
+            result[i] /= peak;
+        }
+
+        return result;
+    }
+}