namespace DeepDrftContent.Processors; /// /// Loudness via root-mean-square amplitude per time bucket. Decodes signed PCM (8-bit unsigned, /// 16/24/32-bit signed little-endian), averages channels to mono, partitions the frames into /// equal time slices, takes the RMS of each slice, then peak-normalizes so the loudest bucket is 1. /// No external audio dependency — operates directly on the WAV data-chunk bytes. /// public class RmsLoudnessAlgorithm : ILoudnessAlgorithm { public double[] Compute(ReadOnlySpan pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount) { if (bucketCount <= 0) { throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive."); } var result = new double[bucketCount]; if (channels <= 0) { return result; } var bytesPerSample = bitsPerSample / 8; if (bytesPerSample <= 0) { return result; } var bytesPerFrame = bytesPerSample * channels; var frameCount = pcmData.Length / bytesPerFrame; if (frameCount == 0) { return result; } // Sum of squared mono amplitudes and the frame count, per bucket. A frame's bucket is // determined by its position in the timeline so buckets are equal-duration slices. var sumSquares = new double[bucketCount]; var counts = new long[bucketCount]; for (var frame = 0; frame < frameCount; frame++) { var frameStart = frame * bytesPerFrame; double channelSum = 0; for (var ch = 0; ch < channels; ch++) { var sampleStart = frameStart + ch * bytesPerSample; channelSum += ReadSampleNormalized(pcmData, sampleStart, bitsPerSample); } var mono = channelSum / channels; // long math avoids overflow on large files before the divide back into bucket index. var bucket = (int)((long)frame * bucketCount / frameCount); if (bucket >= bucketCount) { bucket = bucketCount - 1; } sumSquares[bucket] += mono * mono; counts[bucket]++; } var peak = 0.0; for (var i = 0; i < bucketCount; i++) { if (counts[i] > 0) { result[i] = Math.Sqrt(sumSquares[i] / counts[i]); if (result[i] > peak) { peak = result[i]; } } } if (peak <= 0) { // Silence — return all zeros (Array is already zero-initialized). Array.Clear(result); return result; } for (var i = 0; i < bucketCount; i++) { result[i] /= peak; } return result; } /// /// Decodes one PCM sample at to a normalized amplitude in [-1, 1]. /// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian. /// private static double ReadSampleNormalized(ReadOnlySpan data, int offset, int bitsPerSample) { switch (bitsPerSample) { case 8: // Unsigned, midpoint 128. return (data[offset] - 128) / 128.0; case 16: { short sample = (short)(data[offset] | (data[offset + 1] << 8)); return sample / 32768.0; } case 24: { // Sign-extend the 24-bit little-endian value into an int. int raw = data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16); if ((raw & 0x800000) != 0) { raw |= unchecked((int)0xFF000000); } return raw / 8388608.0; } case 32: { int sample = data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | (data[offset + 3] << 24); return sample / 2147483648.0; } default: throw new ArgumentOutOfRangeException( nameof(bitsPerSample), bitsPerSample, "Unsupported PCM bit depth."); } } }