Stream the waveform compute so large uploads no longer buffer the whole file (Wave 2 OOM)

This commit is contained in:
daniel-c-harvey
2026-06-25 21:49:11 -04:00
parent aa0b64329f
commit 9347f11ff0
10 changed files with 594 additions and 120 deletions
+15 -6
View File
@@ -143,8 +143,9 @@ public class UnifiedReleaseService
return Result.CreateFailResult(MixHasNoTrackMessage);
}
var audio = await _trackContentService.GetAudioBinaryAsync(entryKey);
if (audio is null)
// Duration from the vault index metadata (no body load); its absence means no vault audio.
var duration = await _trackContentService.GetAudioDurationAsync(entryKey);
if (duration is null)
{
_logger.LogWarning("TriggerMixWaveform: no audio in vault for {EntryKey} (release {ReleaseId})", entryKey, releaseId);
return Result.CreateFailResult(MixTrackNoAudioMessage);
@@ -152,10 +153,18 @@ public class UnifiedReleaseService
// Duration-derived, constant-time-resolution capture (≈333 samples/sec) so long mixes are not
// under-sampled by a fixed bucket count — see WaveformResolution / spec §F. Same per-track
// high-res datum every track now carries (phase-12 §5).
var computed = await _waveformProfileService.ComputeAndStoreHighResAsync(
audio.Buffer, entryKey, audio.Duration);
if (!computed)
// high-res datum every track now carries (phase-12 §5). Streamed from the vault in bounded
// chunks (Wave 2): a ~GB mix is never buffered whole. Tri-state — null = entry vanished after
// the metadata read; false = uncomputable / write failed.
var computed = await _waveformProfileService.ComputeAndStoreHighResStreamingAsync(
_ => _trackContentService.OpenAudioStreamAsync(entryKey), entryKey, duration.Value, ct);
if (computed is null)
{
_logger.LogWarning("TriggerMixWaveform: no audio in vault for {EntryKey} (release {ReleaseId})", entryKey, releaseId);
return Result.CreateFailResult(MixTrackNoAudioMessage);
}
if (computed is false)
{
_logger.LogError("TriggerMixWaveform: waveform computation/storage failed for {EntryKey}", entryKey);
return Result.CreateFailResult("Failed to compute the Mix waveform.");
+16 -12
View File
@@ -279,8 +279,8 @@ public class UnifiedTrackService
// The old waveform no longer matches the new bytes. Regenerate both datums in place, keyed by
// the same EntryKey (the re-run overwrites the stale data). The store path no longer hands back
// a buffer, so the waveform compute re-reads the freshly stored audio from the vault — the same
// path the upload uses. That re-read is whole-file (Wave 2, still unbounded by design); the
// store itself is now streamed. Best-effort throughout: a datum failure never fails the replace.
// path the upload uses. That re-read is now a bounded streaming pass (Wave 2); neither the store
// nor the compute holds the whole file. Best-effort throughout: a datum failure never fails the replace.
await TryStoreWaveformDatumsAsync(entryKey, ct);
// Write the new duration to SQL. The vault bytes are already swapped, so this is the
@@ -302,15 +302,16 @@ public class UnifiedTrackService
// Compute and store both waveform datums for a freshly uploaded track: the fixed 512-bucket profile
// the player-bar seeker consumes, and the duration-derived high-res datum the lava visualizer
// consumes (phase-12 §5 — every track now carries one, computed at upload). Both source the same
// audio: read it back from the vault once (the authoritative parsed duration + the stored buffer)
// rather than re-reading and re-parsing the temp file. Best-effort throughout — never fails upload.
// consumes (phase-12 §5 — every track now carries one, computed at upload). Both are reduced in a
// SINGLE streaming pass over the vault audio (Wave 2): the duration comes from the vault index
// metadata (no body load) and the PCM is streamed in bounded chunks through two accumulators, so a
// ~GB mix never lands its whole body in a managed byte[]. Best-effort throughout — never fails upload.
private async Task TryStoreWaveformDatumsAsync(string entryKey, CancellationToken ct)
{
try
{
var audio = await _contentTrackContentService.GetAudioBinaryAsync(entryKey);
if (audio is null)
var duration = await _contentTrackContentService.GetAudioDurationAsync(entryKey);
if (duration is null)
{
_logger.LogWarning(
"Waveform datum step: no audio in vault for {EntryKey} immediately after store; skipping.",
@@ -318,8 +319,8 @@ public class UnifiedTrackService
return;
}
await _waveformProfileService.ComputeAndStoreAsync(audio.Buffer, entryKey);
await _waveformProfileService.ComputeAndStoreHighResAsync(audio.Buffer, entryKey, audio.Duration);
await _waveformProfileService.ComputeAndStoreAllStreamingAsync(
_ => _contentTrackContentService.OpenAudioStreamAsync(entryKey), entryKey, duration.Value, ct);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
@@ -350,8 +351,11 @@ public class UnifiedTrackService
{
ct.ThrowIfCancellationRequested();
var audio = await _contentTrackContentService.GetAudioBinaryAsync(track.EntryKey);
if (audio is null)
// Read the duration from the vault index metadata (no audio body load) — the same value the
// processor wrote at upload. Bounds this admin path too (Wave 2): a backfill over a catalogue
// of long mixes no longer pulls each whole file into memory just to read its runtime.
var duration = await _contentTrackContentService.GetAudioDurationAsync(track.EntryKey);
if (duration is null)
{
_logger.LogWarning("BackfillDurationsAsync: no vault audio for {EntryKey} (track {Id}); skipping.",
track.EntryKey, track.Id);
@@ -359,7 +363,7 @@ public class UnifiedTrackService
continue;
}
var write = await _sqlTrackService.UpdateDuration(track.Id, audio.Duration, ct);
var write = await _sqlTrackService.UpdateDuration(track.Id, duration.Value, ct);
if (!write.Success)
{
var error = write.Messages.FirstOrDefault()?.Message ?? "Unknown error";