Stream the waveform compute so large uploads no longer buffer the whole file (Wave 2 OOM)
This commit is contained in:
@@ -18,100 +18,27 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
||||
/// </summary>
|
||||
public const double SmoothingTimeConstantSeconds = 0.005;
|
||||
|
||||
/// <summary>
|
||||
/// Whole-buffer reduction. Defined in terms of <see cref="CreateAccumulator"/> so the streaming and
|
||||
/// whole-buffer paths share one decode + finalize implementation — byte-identical output by
|
||||
/// construction, not by parallel maintenance.
|
||||
/// </summary>
|
||||
public double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
||||
{
|
||||
var accumulator = CreateAccumulator(pcmData.Length, channels, sampleRate, bitsPerSample, bucketCount);
|
||||
accumulator.Add(pcmData);
|
||||
return accumulator.Finish();
|
||||
}
|
||||
|
||||
public ILoudnessAccumulator CreateAccumulator(
|
||||
long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
||||
{
|
||||
if (bucketCount <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive.");
|
||||
}
|
||||
|
||||
var result = new double[bucketCount];
|
||||
|
||||
if (channels <= 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
var bytesPerSample = bitsPerSample / 8;
|
||||
if (bytesPerSample <= 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
var bytesPerFrame = bytesPerSample * channels;
|
||||
var frameCount = pcmData.Length / bytesPerFrame;
|
||||
if (frameCount == 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
// Sum of squared mono amplitudes and the frame count, per bucket. A frame's bucket is
|
||||
// determined by its position in the timeline so buckets are equal-duration slices.
|
||||
var sumSquares = new double[bucketCount];
|
||||
var counts = new long[bucketCount];
|
||||
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
var frameStart = frame * bytesPerFrame;
|
||||
|
||||
double channelSum = 0;
|
||||
for (var ch = 0; ch < channels; ch++)
|
||||
{
|
||||
var sampleStart = frameStart + ch * bytesPerSample;
|
||||
channelSum += ReadSampleNormalized(pcmData, sampleStart, bitsPerSample);
|
||||
}
|
||||
|
||||
var mono = channelSum / channels;
|
||||
|
||||
// long math avoids overflow on large files before the divide back into bucket index.
|
||||
var bucket = (int)((long)frame * bucketCount / frameCount);
|
||||
if (bucket >= bucketCount)
|
||||
{
|
||||
bucket = bucketCount - 1;
|
||||
}
|
||||
|
||||
sumSquares[bucket] += mono * mono;
|
||||
counts[bucket]++;
|
||||
}
|
||||
|
||||
for (var i = 0; i < bucketCount; i++)
|
||||
{
|
||||
if (counts[i] > 0)
|
||||
{
|
||||
result[i] = Math.Sqrt(sumSquares[i] / counts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Envelope smoothing (~15 ms): round the spikey per-bucket RMS into a smooth contour before
|
||||
// peak-normalization, so the rendered ribbon reads as a continuous curve, not faceted polygons.
|
||||
// Each bucket spans (totalSeconds / bucketCount) of audio; the filter coefficient is derived
|
||||
// from that against the time constant so the smoothing is duration-aware, not a fixed window.
|
||||
var totalSeconds = (double)frameCount / sampleRate;
|
||||
var bucketSeconds = totalSeconds / bucketCount;
|
||||
SmoothEnvelope(result, bucketSeconds);
|
||||
|
||||
var peak = 0.0;
|
||||
for (var i = 0; i < bucketCount; i++)
|
||||
{
|
||||
if (result[i] > peak)
|
||||
{
|
||||
peak = result[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (peak <= 0)
|
||||
{
|
||||
// Silence — return all zeros (Array is already zero-initialized).
|
||||
Array.Clear(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
for (var i = 0; i < bucketCount; i++)
|
||||
{
|
||||
result[i] /= peak;
|
||||
}
|
||||
|
||||
return result;
|
||||
return new RmsLoudnessAccumulator(pcmByteLength, channels, sampleRate, bitsPerSample, bucketCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -122,7 +49,7 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
||||
/// each bucket blends <c>(1 − a)</c> of itself with <c>a</c> of the running envelope. A near-zero
|
||||
/// or non-finite bucket duration leaves the data untouched (nothing to smooth meaningfully).
|
||||
/// </summary>
|
||||
private static void SmoothEnvelope(double[] data, double bucketSeconds)
|
||||
internal static void SmoothEnvelope(double[] data, double bucketSeconds)
|
||||
{
|
||||
if (data.Length < 2 || bucketSeconds <= 0 || !double.IsFinite(bucketSeconds))
|
||||
{
|
||||
@@ -154,7 +81,7 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
||||
/// Decodes one PCM sample at <paramref name="offset"/> to a normalized amplitude in [-1, 1].
|
||||
/// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian.
|
||||
/// </summary>
|
||||
private static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
|
||||
internal static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
|
||||
{
|
||||
switch (bitsPerSample)
|
||||
{
|
||||
@@ -194,3 +121,167 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Single-pass RMS accumulator backing <see cref="RmsLoudnessAlgorithm"/>. Frames are fed via
|
||||
/// <see cref="Add"/> in arbitrary chunks; a partial frame straddling a chunk boundary is carried in a
|
||||
/// one-frame buffer. The per-frame decode, bucket assignment, and per-bucket accumulation are the exact
|
||||
/// arithmetic the former whole-buffer loop used, in the same frame order, so the floating-point result
|
||||
/// is bit-identical whether the PCM arrives in one span or many. <see cref="Finish"/> applies the same
|
||||
/// envelope smoothing and peak-normalization as before. Memory is O(bucketCount + one frame).
|
||||
/// </summary>
|
||||
public sealed class RmsLoudnessAccumulator : ILoudnessAccumulator
|
||||
{
|
||||
private readonly int _channels;
|
||||
private readonly int _sampleRate;
|
||||
private readonly int _bitsPerSample;
|
||||
private readonly int _bucketCount;
|
||||
private readonly int _bytesPerSample;
|
||||
private readonly int _bytesPerFrame;
|
||||
private readonly long _frameCount;
|
||||
|
||||
private readonly double[] _sumSquares;
|
||||
private readonly long[] _counts;
|
||||
private readonly byte[] _carry;
|
||||
private int _carryLen;
|
||||
private long _frameIndex;
|
||||
|
||||
internal RmsLoudnessAccumulator(long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
||||
{
|
||||
_channels = channels;
|
||||
_sampleRate = sampleRate;
|
||||
_bitsPerSample = bitsPerSample;
|
||||
_bucketCount = bucketCount;
|
||||
_sumSquares = new double[bucketCount];
|
||||
_counts = new long[bucketCount];
|
||||
|
||||
// Guards mirror the former whole-buffer Compute exactly: any degenerate parameter leaves
|
||||
// _frameCount at 0, so Add is a no-op and Finish returns the zero-initialized profile.
|
||||
_bytesPerSample = bitsPerSample / 8;
|
||||
if (channels <= 0 || _bytesPerSample <= 0)
|
||||
{
|
||||
_bytesPerFrame = 0;
|
||||
_frameCount = 0;
|
||||
_carry = [];
|
||||
return;
|
||||
}
|
||||
|
||||
_bytesPerFrame = _bytesPerSample * channels;
|
||||
_frameCount = pcmByteLength / _bytesPerFrame;
|
||||
_carry = new byte[_bytesPerFrame];
|
||||
}
|
||||
|
||||
public void Add(ReadOnlySpan<byte> pcmChunk)
|
||||
{
|
||||
if (_frameIndex >= _frameCount)
|
||||
{
|
||||
return; // degenerate input, or every expected frame already consumed
|
||||
}
|
||||
|
||||
var pos = 0;
|
||||
|
||||
// Complete a frame carried from the previous chunk first.
|
||||
if (_carryLen > 0)
|
||||
{
|
||||
var need = _bytesPerFrame - _carryLen;
|
||||
var take = Math.Min(need, pcmChunk.Length);
|
||||
pcmChunk.Slice(0, take).CopyTo(_carry.AsSpan(_carryLen));
|
||||
_carryLen += take;
|
||||
pos += take;
|
||||
|
||||
if (_carryLen < _bytesPerFrame)
|
||||
{
|
||||
return; // still not a full frame
|
||||
}
|
||||
|
||||
ProcessFrame(_carry);
|
||||
_carryLen = 0;
|
||||
if (_frameIndex >= _frameCount)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Whole frames directly from the chunk.
|
||||
while (pos + _bytesPerFrame <= pcmChunk.Length && _frameIndex < _frameCount)
|
||||
{
|
||||
ProcessFrame(pcmChunk.Slice(pos, _bytesPerFrame));
|
||||
pos += _bytesPerFrame;
|
||||
}
|
||||
|
||||
// Stash a trailing partial frame for the next chunk — but only while frames are still expected.
|
||||
// A trailing partial frame on the final chunk is dropped, matching the whole-buffer path.
|
||||
if (_frameIndex < _frameCount && pos < pcmChunk.Length)
|
||||
{
|
||||
var remainder = pcmChunk.Slice(pos);
|
||||
remainder.CopyTo(_carry);
|
||||
_carryLen = remainder.Length;
|
||||
}
|
||||
}
|
||||
|
||||
private void ProcessFrame(ReadOnlySpan<byte> frame)
|
||||
{
|
||||
double channelSum = 0;
|
||||
for (var ch = 0; ch < _channels; ch++)
|
||||
{
|
||||
channelSum += RmsLoudnessAlgorithm.ReadSampleNormalized(frame, ch * _bytesPerSample, _bitsPerSample);
|
||||
}
|
||||
|
||||
var mono = channelSum / _channels;
|
||||
|
||||
// long math avoids overflow on large files before the divide back into bucket index.
|
||||
var bucket = (int)(_frameIndex * _bucketCount / _frameCount);
|
||||
if (bucket >= _bucketCount)
|
||||
{
|
||||
bucket = _bucketCount - 1;
|
||||
}
|
||||
|
||||
_sumSquares[bucket] += mono * mono;
|
||||
_counts[bucket]++;
|
||||
_frameIndex++;
|
||||
}
|
||||
|
||||
public double[] Finish()
|
||||
{
|
||||
var result = new double[_bucketCount];
|
||||
if (_frameCount == 0)
|
||||
{
|
||||
return result; // degenerate input — all zeros, as the whole-buffer guards returned
|
||||
}
|
||||
|
||||
for (var i = 0; i < _bucketCount; i++)
|
||||
{
|
||||
if (_counts[i] > 0)
|
||||
{
|
||||
result[i] = Math.Sqrt(_sumSquares[i] / _counts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Envelope smoothing (~15 ms) then peak-normalization — identical to the whole-buffer finalize.
|
||||
var totalSeconds = (double)_frameCount / _sampleRate;
|
||||
var bucketSeconds = totalSeconds / _bucketCount;
|
||||
RmsLoudnessAlgorithm.SmoothEnvelope(result, bucketSeconds);
|
||||
|
||||
var peak = 0.0;
|
||||
for (var i = 0; i < _bucketCount; i++)
|
||||
{
|
||||
if (result[i] > peak)
|
||||
{
|
||||
peak = result[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (peak <= 0)
|
||||
{
|
||||
Array.Clear(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
for (var i = 0; i < _bucketCount; i++)
|
||||
{
|
||||
result[i] /= peak;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user