Stream the waveform compute so large uploads no longer buffer the whole file (Wave 2 OOM)

This commit is contained in:
daniel-c-harvey
2026-06-25 21:49:11 -04:00
parent aa0b64329f
commit 9347f11ff0
10 changed files with 594 additions and 120 deletions
@@ -288,6 +288,105 @@ public class AudioProcessor
return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
}
/// <summary>
/// Reads only the WAV header region from <paramref name="stream"/> (a bounded window, never the
/// audio body) and returns where the PCM data region begins, how long it is, and the format
/// parameters needed to decode it — the streaming counterpart of <see cref="TryExtractPcm"/>. The
/// data length is clamped against <paramref name="totalFileLength"/> (the true backing-file size),
/// so the caller streams exactly the present PCM. Returns null for the same inputs
/// <see cref="TryExtractPcm"/> rejects — non-WAV bytes (mp3/flac), float, and padded-container
/// EXTENSIBLE — so the caller treats null as "no profile computable" and continues gracefully.
///
/// <paramref name="stream"/> must be positioned at the start; on return its position is past the
/// header window (the caller seeks to <c>DataStart</c> before streaming the body). No whole-file
/// buffer is allocated — peak memory is the bounded header window.
/// </summary>
public async Task<WavPcmStreamInfo?> TryReadPcmStreamInfoAsync(
Stream stream, long totalFileLength, CancellationToken cancellationToken = default)
{
var window = await ReadWavHeaderWindowAsync(stream, cancellationToken);
if (window is null)
{
return null;
}
var validation = ValidateWavStructure(window);
if (!validation.IsValid || validation.IsFloat)
{
return null;
}
WavMetadata metadata;
try
{
metadata = ParseWavMetadata(window, validation);
ValidateAudioParameters(metadata);
if (metadata.IsPaddedContainer)
{
return null;
}
}
catch
{
return null;
}
long dataStart = validation.DataChunkPos + 8;
if (dataStart > totalFileLength)
{
return null;
}
var available = totalFileLength - dataStart;
var dataLength = Math.Min((long)metadata.DataSize, available);
if (dataLength <= 0)
{
return null;
}
return new WavPcmStreamInfo(
dataStart, dataLength, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
}
/// <summary>
/// Reads enough of <paramref name="stream"/> to contain the fmt chunk and the data chunk's 8-byte
/// header, growing in 64 KB steps until the data chunk is locatable or EOF / the
/// <see cref="HeaderWindowCap"/> is reached. Bails after the first read when the bytes are not a
/// RIFF/WAVE container, so a non-WAV stream (mp3/flac) costs one read, not the full cap. Returns
/// null only when nothing could be read.
/// </summary>
private static async Task<byte[]?> ReadWavHeaderWindowAsync(Stream stream, CancellationToken ct)
{
using var ms = new MemoryStream();
var buffer = new byte[HeaderWindowStep];
while (ms.Length < HeaderWindowCap)
{
var read = await stream.ReadAsync(buffer, ct);
if (read == 0)
break;
ms.Write(buffer, 0, read);
var soFar = ms.ToArray();
// Early-out for non-WAV input: once at least the 12-byte RIFF/WAVE preamble is in hand,
// a missing signature means this will never be a WAV — stop rather than read to the cap.
if (soFar.Length >= 12 && !HasRiffWaveSignature(soFar))
return soFar;
// FindChunk returns -1 until the data chunk header is fully in the window; on a normal
// file it sits within the first 64 KB so this loop runs exactly once.
if (FindChunk(soFar, "data") >= 0)
return soFar;
}
return ms.Length > 0 ? ms.ToArray() : null;
}
private static bool HasRiffWaveSignature(byte[] buffer) =>
buffer.Length >= 12
&& System.Text.Encoding.ASCII.GetString(buffer, 0, 4) == "RIFF"
&& System.Text.Encoding.ASCII.GetString(buffer, 8, 4) == "WAVE";
/// <summary>
/// Extracts metadata from WAV file buffer with comprehensive validation
/// </summary>
@@ -698,4 +797,21 @@ public readonly record struct PcmData(
ReadOnlyMemory<byte> Pcm,
int Channels,
int SampleRate,
int BitsPerSample);
/// <summary>
/// Where a WAV's PCM data region lives and how to decode it, without the bytes themselves — the
/// streaming counterpart of <see cref="PcmData"/>. The caller seeks to <see cref="DataStart"/> and
/// streams exactly <see cref="DataLength"/> bytes through a loudness accumulator.
/// </summary>
/// <param name="DataStart">Absolute byte offset of the first PCM sample (past the data chunk header).</param>
/// <param name="DataLength">PCM region length in bytes, clamped to what the backing file actually holds.</param>
/// <param name="Channels">Number of interleaved channels.</param>
/// <param name="SampleRate">Samples per second.</param>
/// <param name="BitsPerSample">Bit depth per sample (8, 16, 24, or 32).</param>
public readonly record struct WavPcmStreamInfo(
long DataStart,
long DataLength,
int Channels,
int SampleRate,
int BitsPerSample);