namespace DeepDrftContent.Processors; /// /// Loudness via root-mean-square amplitude per time bucket. Decodes signed PCM (8-bit unsigned, /// 16/24/32-bit signed little-endian), averages channels to mono, partitions the frames into /// equal time slices, takes the RMS of each slice, applies a ~15 ms envelope-follower smoothing /// so the contour reads as a smooth curve rather than a spikey polygon, then peak-normalizes so /// the loudest bucket is 1. No external audio dependency — operates directly on the WAV data-chunk bytes. /// public class RmsLoudnessAlgorithm : ILoudnessAlgorithm { /// /// Envelope-follower time constant, seconds. ~15 ms is the smoothing target (Phase 10 /// tuning, reduced from 50 ms which was over-smoothed): long enough to round off the /// per-bucket RMS spikes into a smooth ribbon contour, short enough that real loudness /// transients (kicks, drops) still read. Applied as a symmetric (forward+backward) one-pole /// filter so the smoothing introduces no time lag. /// public const double SmoothingTimeConstantSeconds = 0.005; /// /// Whole-buffer reduction. Defined in terms of so the streaming and /// whole-buffer paths share one decode + finalize implementation — byte-identical output by /// construction, not by parallel maintenance. /// public double[] Compute(ReadOnlySpan pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount) { var accumulator = CreateAccumulator(pcmData.Length, channels, sampleRate, bitsPerSample, bucketCount); accumulator.Add(pcmData); return accumulator.Finish(); } public ILoudnessAccumulator CreateAccumulator( long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount) { if (bucketCount <= 0) { throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive."); } return new RmsLoudnessAccumulator(pcmByteLength, channels, sampleRate, bitsPerSample, bucketCount); } /// /// Symmetric one-pole envelope smoothing over the per-bucket loudness, in place. A forward pass /// then a backward pass cancels the single-pole phase lag, so the smoothed contour stays aligned /// with the audio (no rightward time shift). The coefficient a = exp(−bucketSeconds / τ) /// gives a ~-relative response targeting the ~15 ms time constant: /// each bucket blends (1 − a) of itself with a of the running envelope. A near-zero /// or non-finite bucket duration leaves the data untouched (nothing to smooth meaningfully). /// internal static void SmoothEnvelope(double[] data, double bucketSeconds) { if (data.Length < 2 || bucketSeconds <= 0 || !double.IsFinite(bucketSeconds)) { return; } var a = Math.Exp(-bucketSeconds / SmoothingTimeConstantSeconds); // a→1 means buckets are far finer than τ (heavy smoothing); a→0 means each bucket already // spans ≫ τ, so smoothing is a no-op. Either extreme is handled by the blend below. // Forward pass. var env = data[0]; for (var i = 0; i < data.Length; i++) { env = a * env + (1 - a) * data[i]; data[i] = env; } // Backward pass (zero-phase): smooth the forward result in reverse so the net lag is zero. env = data[^1]; for (var i = data.Length - 1; i >= 0; i--) { env = a * env + (1 - a) * data[i]; data[i] = env; } } /// /// Decodes one PCM sample at to a normalized amplitude in [-1, 1]. /// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian. /// internal static double ReadSampleNormalized(ReadOnlySpan data, int offset, int bitsPerSample) { switch (bitsPerSample) { case 8: // Unsigned, midpoint 128. return (data[offset] - 128) / 128.0; case 16: { short sample = (short)(data[offset] | (data[offset + 1] << 8)); return sample / 32768.0; } case 24: { // Sign-extend the 24-bit little-endian value into an int. int raw = data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16); if ((raw & 0x800000) != 0) { raw |= unchecked((int)0xFF000000); } return raw / 8388608.0; } case 32: { int sample = data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | (data[offset + 3] << 24); return sample / 2147483648.0; } default: throw new ArgumentOutOfRangeException( nameof(bitsPerSample), bitsPerSample, "Unsupported PCM bit depth."); } } } /// /// Single-pass RMS accumulator backing . Frames are fed via /// in arbitrary chunks; a partial frame straddling a chunk boundary is carried in a /// one-frame buffer. The per-frame decode, bucket assignment, and per-bucket accumulation are the exact /// arithmetic the former whole-buffer loop used, in the same frame order, so the floating-point result /// is bit-identical whether the PCM arrives in one span or many. applies the same /// envelope smoothing and peak-normalization as before. Memory is O(bucketCount + one frame). /// public sealed class RmsLoudnessAccumulator : ILoudnessAccumulator { private readonly int _channels; private readonly int _sampleRate; private readonly int _bitsPerSample; private readonly int _bucketCount; private readonly int _bytesPerSample; private readonly int _bytesPerFrame; private readonly long _frameCount; private readonly double[] _sumSquares; private readonly long[] _counts; private readonly byte[] _carry; private int _carryLen; private long _frameIndex; internal RmsLoudnessAccumulator(long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount) { _channels = channels; _sampleRate = sampleRate; _bitsPerSample = bitsPerSample; _bucketCount = bucketCount; _sumSquares = new double[bucketCount]; _counts = new long[bucketCount]; // Guards mirror the former whole-buffer Compute exactly: any degenerate parameter leaves // _frameCount at 0, so Add is a no-op and Finish returns the zero-initialized profile. _bytesPerSample = bitsPerSample / 8; if (channels <= 0 || _bytesPerSample <= 0) { _bytesPerFrame = 0; _frameCount = 0; _carry = []; return; } _bytesPerFrame = _bytesPerSample * channels; _frameCount = pcmByteLength / _bytesPerFrame; _carry = new byte[_bytesPerFrame]; } public void Add(ReadOnlySpan pcmChunk) { if (_frameIndex >= _frameCount) { return; // degenerate input, or every expected frame already consumed } var pos = 0; // Complete a frame carried from the previous chunk first. if (_carryLen > 0) { var need = _bytesPerFrame - _carryLen; var take = Math.Min(need, pcmChunk.Length); pcmChunk.Slice(0, take).CopyTo(_carry.AsSpan(_carryLen)); _carryLen += take; pos += take; if (_carryLen < _bytesPerFrame) { return; // still not a full frame } ProcessFrame(_carry); _carryLen = 0; if (_frameIndex >= _frameCount) { return; } } // Whole frames directly from the chunk. while (pos + _bytesPerFrame <= pcmChunk.Length && _frameIndex < _frameCount) { ProcessFrame(pcmChunk.Slice(pos, _bytesPerFrame)); pos += _bytesPerFrame; } // Stash a trailing partial frame for the next chunk — but only while frames are still expected. // A trailing partial frame on the final chunk is dropped, matching the whole-buffer path. if (_frameIndex < _frameCount && pos < pcmChunk.Length) { var remainder = pcmChunk.Slice(pos); remainder.CopyTo(_carry); _carryLen = remainder.Length; } } private void ProcessFrame(ReadOnlySpan frame) { double channelSum = 0; for (var ch = 0; ch < _channels; ch++) { channelSum += RmsLoudnessAlgorithm.ReadSampleNormalized(frame, ch * _bytesPerSample, _bitsPerSample); } var mono = channelSum / _channels; // long math avoids overflow on large files before the divide back into bucket index. var bucket = (int)(_frameIndex * _bucketCount / _frameCount); if (bucket >= _bucketCount) { bucket = _bucketCount - 1; } _sumSquares[bucket] += mono * mono; _counts[bucket]++; _frameIndex++; } public double[] Finish() { var result = new double[_bucketCount]; if (_frameCount == 0) { return result; // degenerate input — all zeros, as the whole-buffer guards returned } for (var i = 0; i < _bucketCount; i++) { if (_counts[i] > 0) { result[i] = Math.Sqrt(_sumSquares[i] / _counts[i]); } } // Envelope smoothing (~15 ms) then peak-normalization — identical to the whole-buffer finalize. var totalSeconds = (double)_frameCount / _sampleRate; var bucketSeconds = totalSeconds / _bucketCount; RmsLoudnessAlgorithm.SmoothEnvelope(result, bucketSeconds); var peak = 0.0; for (var i = 0; i < _bucketCount; i++) { if (result[i] > peak) { peak = result[i]; } } if (peak <= 0) { Array.Clear(result); return result; } for (var i = 0; i < _bucketCount; i++) { result[i] /= peak; } return result; } }