Merge Wave 2: stream waveform compute to bound large-upload memory into dev
This commit is contained in:
@@ -112,10 +112,10 @@ public class ReleaseController : ControllerBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
// POST api/release/{id}/mix/waveform ([ApiKeyAuthorize], no body)
|
// POST api/release/{id}/mix/waveform ([ApiKeyAuthorize], no body)
|
||||||
// Server-side trigger: fetch the Mix's track audio from the vault, compute a duration-derived high-res
|
// Server-side trigger: stream the Mix's track audio from the vault, compute a duration-derived
|
||||||
// waveform via ComputeAndStoreHighResAsync, store it in the track-waveforms vault, and set
|
// high-res waveform, store it in the track-waveforms vault, and set MixMetadata.WaveformEntryKey.
|
||||||
// MixMetadata.WaveformEntryKey. 404 when the release is missing or has no stored audio; 500 on
|
// 404 when the release is missing or has no stored audio; 500 on compute/storage failure. Declared
|
||||||
// compute/storage failure. Declared before "{id:long}".
|
// before "{id:long}".
|
||||||
[ApiKeyAuthorize]
|
[ApiKeyAuthorize]
|
||||||
[HttpPost("{id:long}/mix/waveform")]
|
[HttpPost("{id:long}/mix/waveform")]
|
||||||
public async Task<ActionResult> GenerateMixWaveform(long id, CancellationToken ct = default)
|
public async Task<ActionResult> GenerateMixWaveform(long id, CancellationToken ct = default)
|
||||||
|
|||||||
@@ -756,15 +756,18 @@ public class TrackController : ControllerBase
|
|||||||
[HttpPost("{trackId}/waveform")]
|
[HttpPost("{trackId}/waveform")]
|
||||||
public async Task<ActionResult> GenerateWaveform(string trackId)
|
public async Task<ActionResult> GenerateWaveform(string trackId)
|
||||||
{
|
{
|
||||||
var audio = await _trackContentService.GetAudioBinaryAsync(trackId);
|
// Streaming compute (Wave 2): the WAV is read from the vault in bounded chunks, never buffered
|
||||||
if (audio is null)
|
// whole. Tri-state: null = no vault audio (404), false = present but uncomputable / write failed
|
||||||
|
// (500), true = stored.
|
||||||
|
var stored = await _waveformProfileService.ComputeAndStoreProfileStreamingAsync(
|
||||||
|
_ => _trackContentService.OpenAudioStreamAsync(trackId), trackId, HttpContext.RequestAborted);
|
||||||
|
if (stored is null)
|
||||||
{
|
{
|
||||||
_logger.LogWarning("GenerateWaveform: no audio in vault for {TrackId}", trackId);
|
_logger.LogWarning("GenerateWaveform: no audio in vault for {TrackId}", trackId);
|
||||||
return NotFound();
|
return NotFound();
|
||||||
}
|
}
|
||||||
|
|
||||||
var stored = await _waveformProfileService.ComputeAndStoreAsync(audio.Buffer, trackId);
|
if (stored is false)
|
||||||
if (!stored)
|
|
||||||
{
|
{
|
||||||
_logger.LogError("GenerateWaveform: profile computation/storage failed for {TrackId}", trackId);
|
_logger.LogError("GenerateWaveform: profile computation/storage failed for {TrackId}", trackId);
|
||||||
return StatusCode(500, "Failed to generate waveform profile.");
|
return StatusCode(500, "Failed to generate waveform profile.");
|
||||||
@@ -784,16 +787,27 @@ public class TrackController : ControllerBase
|
|||||||
[HttpPost("{trackId}/waveform/high-res")]
|
[HttpPost("{trackId}/waveform/high-res")]
|
||||||
public async Task<ActionResult> GenerateHighResWaveform(string trackId)
|
public async Task<ActionResult> GenerateHighResWaveform(string trackId)
|
||||||
{
|
{
|
||||||
var audio = await _trackContentService.GetAudioBinaryAsync(trackId);
|
// The high-res bucket count is duration-derived. Read the duration from the vault index metadata
|
||||||
if (audio is null)
|
// (no body load); its absence means the track has no vault audio → 404.
|
||||||
|
var duration = await _trackContentService.GetAudioDurationAsync(trackId);
|
||||||
|
if (duration is null)
|
||||||
{
|
{
|
||||||
_logger.LogWarning("GenerateHighResWaveform: no audio in vault for {TrackId}", trackId);
|
_logger.LogWarning("GenerateHighResWaveform: no audio in vault for {TrackId}", trackId);
|
||||||
return NotFound();
|
return NotFound();
|
||||||
}
|
}
|
||||||
|
|
||||||
var stored = await _waveformProfileService.ComputeAndStoreHighResAsync(
|
// Streaming compute (Wave 2): bounded read of the vault WAV. Tri-state mapping as in
|
||||||
audio.Buffer, trackId, audio.Duration);
|
// GenerateWaveform — null (entry vanished between the metadata read and the compute) → 404.
|
||||||
if (!stored)
|
var stored = await _waveformProfileService.ComputeAndStoreHighResStreamingAsync(
|
||||||
|
_ => _trackContentService.OpenAudioStreamAsync(trackId), trackId, duration.Value,
|
||||||
|
HttpContext.RequestAborted);
|
||||||
|
if (stored is null)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("GenerateHighResWaveform: no audio in vault for {TrackId}", trackId);
|
||||||
|
return NotFound();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stored is false)
|
||||||
{
|
{
|
||||||
_logger.LogError("GenerateHighResWaveform: computation/storage failed for {TrackId}", trackId);
|
_logger.LogError("GenerateHighResWaveform: computation/storage failed for {TrackId}", trackId);
|
||||||
return StatusCode(500, "Failed to generate high-res waveform datum.");
|
return StatusCode(500, "Failed to generate high-res waveform datum.");
|
||||||
|
|||||||
@@ -143,8 +143,9 @@ public class UnifiedReleaseService
|
|||||||
return Result.CreateFailResult(MixHasNoTrackMessage);
|
return Result.CreateFailResult(MixHasNoTrackMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
var audio = await _trackContentService.GetAudioBinaryAsync(entryKey);
|
// Duration from the vault index metadata (no body load); its absence means no vault audio.
|
||||||
if (audio is null)
|
var duration = await _trackContentService.GetAudioDurationAsync(entryKey);
|
||||||
|
if (duration is null)
|
||||||
{
|
{
|
||||||
_logger.LogWarning("TriggerMixWaveform: no audio in vault for {EntryKey} (release {ReleaseId})", entryKey, releaseId);
|
_logger.LogWarning("TriggerMixWaveform: no audio in vault for {EntryKey} (release {ReleaseId})", entryKey, releaseId);
|
||||||
return Result.CreateFailResult(MixTrackNoAudioMessage);
|
return Result.CreateFailResult(MixTrackNoAudioMessage);
|
||||||
@@ -152,10 +153,18 @@ public class UnifiedReleaseService
|
|||||||
|
|
||||||
// Duration-derived, constant-time-resolution capture (≈333 samples/sec) so long mixes are not
|
// Duration-derived, constant-time-resolution capture (≈333 samples/sec) so long mixes are not
|
||||||
// under-sampled by a fixed bucket count — see WaveformResolution / spec §F. Same per-track
|
// under-sampled by a fixed bucket count — see WaveformResolution / spec §F. Same per-track
|
||||||
// high-res datum every track now carries (phase-12 §5).
|
// high-res datum every track now carries (phase-12 §5). Streamed from the vault in bounded
|
||||||
var computed = await _waveformProfileService.ComputeAndStoreHighResAsync(
|
// chunks (Wave 2): a ~GB mix is never buffered whole. Tri-state — null = entry vanished after
|
||||||
audio.Buffer, entryKey, audio.Duration);
|
// the metadata read; false = uncomputable / write failed.
|
||||||
if (!computed)
|
var computed = await _waveformProfileService.ComputeAndStoreHighResStreamingAsync(
|
||||||
|
_ => _trackContentService.OpenAudioStreamAsync(entryKey), entryKey, duration.Value, ct);
|
||||||
|
if (computed is null)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("TriggerMixWaveform: no audio in vault for {EntryKey} (release {ReleaseId})", entryKey, releaseId);
|
||||||
|
return Result.CreateFailResult(MixTrackNoAudioMessage);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (computed is false)
|
||||||
{
|
{
|
||||||
_logger.LogError("TriggerMixWaveform: waveform computation/storage failed for {EntryKey}", entryKey);
|
_logger.LogError("TriggerMixWaveform: waveform computation/storage failed for {EntryKey}", entryKey);
|
||||||
return Result.CreateFailResult("Failed to compute the Mix waveform.");
|
return Result.CreateFailResult("Failed to compute the Mix waveform.");
|
||||||
|
|||||||
@@ -279,8 +279,8 @@ public class UnifiedTrackService
|
|||||||
// The old waveform no longer matches the new bytes. Regenerate both datums in place, keyed by
|
// The old waveform no longer matches the new bytes. Regenerate both datums in place, keyed by
|
||||||
// the same EntryKey (the re-run overwrites the stale data). The store path no longer hands back
|
// the same EntryKey (the re-run overwrites the stale data). The store path no longer hands back
|
||||||
// a buffer, so the waveform compute re-reads the freshly stored audio from the vault — the same
|
// a buffer, so the waveform compute re-reads the freshly stored audio from the vault — the same
|
||||||
// path the upload uses. That re-read is whole-file (Wave 2, still unbounded by design); the
|
// path the upload uses. That re-read is now a bounded streaming pass (Wave 2); neither the store
|
||||||
// store itself is now streamed. Best-effort throughout: a datum failure never fails the replace.
|
// nor the compute holds the whole file. Best-effort throughout: a datum failure never fails the replace.
|
||||||
await TryStoreWaveformDatumsAsync(entryKey, ct);
|
await TryStoreWaveformDatumsAsync(entryKey, ct);
|
||||||
|
|
||||||
// Write the new duration to SQL. The vault bytes are already swapped, so this is the
|
// Write the new duration to SQL. The vault bytes are already swapped, so this is the
|
||||||
@@ -302,15 +302,16 @@ public class UnifiedTrackService
|
|||||||
|
|
||||||
// Compute and store both waveform datums for a freshly uploaded track: the fixed 512-bucket profile
|
// Compute and store both waveform datums for a freshly uploaded track: the fixed 512-bucket profile
|
||||||
// the player-bar seeker consumes, and the duration-derived high-res datum the lava visualizer
|
// the player-bar seeker consumes, and the duration-derived high-res datum the lava visualizer
|
||||||
// consumes (phase-12 §5 — every track now carries one, computed at upload). Both source the same
|
// consumes (phase-12 §5 — every track now carries one, computed at upload). Both are reduced in a
|
||||||
// audio: read it back from the vault once (the authoritative parsed duration + the stored buffer)
|
// SINGLE streaming pass over the vault audio (Wave 2): the duration comes from the vault index
|
||||||
// rather than re-reading and re-parsing the temp file. Best-effort throughout — never fails upload.
|
// metadata (no body load) and the PCM is streamed in bounded chunks through two accumulators, so a
|
||||||
|
// ~GB mix never lands its whole body in a managed byte[]. Best-effort throughout — never fails upload.
|
||||||
private async Task TryStoreWaveformDatumsAsync(string entryKey, CancellationToken ct)
|
private async Task TryStoreWaveformDatumsAsync(string entryKey, CancellationToken ct)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var audio = await _contentTrackContentService.GetAudioBinaryAsync(entryKey);
|
var duration = await _contentTrackContentService.GetAudioDurationAsync(entryKey);
|
||||||
if (audio is null)
|
if (duration is null)
|
||||||
{
|
{
|
||||||
_logger.LogWarning(
|
_logger.LogWarning(
|
||||||
"Waveform datum step: no audio in vault for {EntryKey} immediately after store; skipping.",
|
"Waveform datum step: no audio in vault for {EntryKey} immediately after store; skipping.",
|
||||||
@@ -318,8 +319,8 @@ public class UnifiedTrackService
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
await _waveformProfileService.ComputeAndStoreAsync(audio.Buffer, entryKey);
|
await _waveformProfileService.ComputeAndStoreAllStreamingAsync(
|
||||||
await _waveformProfileService.ComputeAndStoreHighResAsync(audio.Buffer, entryKey, audio.Duration);
|
_ => _contentTrackContentService.OpenAudioStreamAsync(entryKey), entryKey, duration.Value, ct);
|
||||||
}
|
}
|
||||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||||
{
|
{
|
||||||
@@ -350,8 +351,11 @@ public class UnifiedTrackService
|
|||||||
{
|
{
|
||||||
ct.ThrowIfCancellationRequested();
|
ct.ThrowIfCancellationRequested();
|
||||||
|
|
||||||
var audio = await _contentTrackContentService.GetAudioBinaryAsync(track.EntryKey);
|
// Read the duration from the vault index metadata (no audio body load) — the same value the
|
||||||
if (audio is null)
|
// processor wrote at upload. Bounds this admin path too (Wave 2): a backfill over a catalogue
|
||||||
|
// of long mixes no longer pulls each whole file into memory just to read its runtime.
|
||||||
|
var duration = await _contentTrackContentService.GetAudioDurationAsync(track.EntryKey);
|
||||||
|
if (duration is null)
|
||||||
{
|
{
|
||||||
_logger.LogWarning("BackfillDurationsAsync: no vault audio for {EntryKey} (track {Id}); skipping.",
|
_logger.LogWarning("BackfillDurationsAsync: no vault audio for {EntryKey} (track {Id}); skipping.",
|
||||||
track.EntryKey, track.Id);
|
track.EntryKey, track.Id);
|
||||||
@@ -359,7 +363,7 @@ public class UnifiedTrackService
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
var write = await _sqlTrackService.UpdateDuration(track.Id, audio.Duration, ct);
|
var write = await _sqlTrackService.UpdateDuration(track.Id, duration.Value, ct);
|
||||||
if (!write.Success)
|
if (!write.Success)
|
||||||
{
|
{
|
||||||
var error = write.Messages.FirstOrDefault()?.Message ?? "Unknown error";
|
var error = write.Messages.FirstOrDefault()?.Message ?? "Unknown error";
|
||||||
|
|||||||
@@ -288,6 +288,105 @@ public class AudioProcessor
|
|||||||
return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
|
return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reads only the WAV header region from <paramref name="stream"/> (a bounded window, never the
|
||||||
|
/// audio body) and returns where the PCM data region begins, how long it is, and the format
|
||||||
|
/// parameters needed to decode it — the streaming counterpart of <see cref="TryExtractPcm"/>. The
|
||||||
|
/// data length is clamped against <paramref name="totalFileLength"/> (the true backing-file size),
|
||||||
|
/// so the caller streams exactly the present PCM. Returns null for the same inputs
|
||||||
|
/// <see cref="TryExtractPcm"/> rejects — non-WAV bytes (mp3/flac), float, and padded-container
|
||||||
|
/// EXTENSIBLE — so the caller treats null as "no profile computable" and continues gracefully.
|
||||||
|
///
|
||||||
|
/// <paramref name="stream"/> must be positioned at the start; on return its position is past the
|
||||||
|
/// header window (the caller seeks to <c>DataStart</c> before streaming the body). No whole-file
|
||||||
|
/// buffer is allocated — peak memory is the bounded header window.
|
||||||
|
/// </summary>
|
||||||
|
public async Task<WavPcmStreamInfo?> TryReadPcmStreamInfoAsync(
|
||||||
|
Stream stream, long totalFileLength, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var window = await ReadWavHeaderWindowAsync(stream, cancellationToken);
|
||||||
|
if (window is null)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var validation = ValidateWavStructure(window);
|
||||||
|
if (!validation.IsValid || validation.IsFloat)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
WavMetadata metadata;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
metadata = ParseWavMetadata(window, validation);
|
||||||
|
ValidateAudioParameters(metadata);
|
||||||
|
if (metadata.IsPaddedContainer)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
long dataStart = validation.DataChunkPos + 8;
|
||||||
|
if (dataStart > totalFileLength)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var available = totalFileLength - dataStart;
|
||||||
|
var dataLength = Math.Min((long)metadata.DataSize, available);
|
||||||
|
if (dataLength <= 0)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new WavPcmStreamInfo(
|
||||||
|
dataStart, dataLength, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reads enough of <paramref name="stream"/> to contain the fmt chunk and the data chunk's 8-byte
|
||||||
|
/// header, growing in 64 KB steps until the data chunk is locatable or EOF / the
|
||||||
|
/// <see cref="HeaderWindowCap"/> is reached. Bails after the first read when the bytes are not a
|
||||||
|
/// RIFF/WAVE container, so a non-WAV stream (mp3/flac) costs one read, not the full cap. Returns
|
||||||
|
/// null only when nothing could be read.
|
||||||
|
/// </summary>
|
||||||
|
private static async Task<byte[]?> ReadWavHeaderWindowAsync(Stream stream, CancellationToken ct)
|
||||||
|
{
|
||||||
|
using var ms = new MemoryStream();
|
||||||
|
var buffer = new byte[HeaderWindowStep];
|
||||||
|
while (ms.Length < HeaderWindowCap)
|
||||||
|
{
|
||||||
|
var read = await stream.ReadAsync(buffer, ct);
|
||||||
|
if (read == 0)
|
||||||
|
break;
|
||||||
|
ms.Write(buffer, 0, read);
|
||||||
|
|
||||||
|
var soFar = ms.ToArray();
|
||||||
|
|
||||||
|
// Early-out for non-WAV input: once at least the 12-byte RIFF/WAVE preamble is in hand,
|
||||||
|
// a missing signature means this will never be a WAV — stop rather than read to the cap.
|
||||||
|
if (soFar.Length >= 12 && !HasRiffWaveSignature(soFar))
|
||||||
|
return soFar;
|
||||||
|
|
||||||
|
// FindChunk returns -1 until the data chunk header is fully in the window; on a normal
|
||||||
|
// file it sits within the first 64 KB so this loop runs exactly once.
|
||||||
|
if (FindChunk(soFar, "data") >= 0)
|
||||||
|
return soFar;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ms.Length > 0 ? ms.ToArray() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool HasRiffWaveSignature(byte[] buffer) =>
|
||||||
|
buffer.Length >= 12
|
||||||
|
&& System.Text.Encoding.ASCII.GetString(buffer, 0, 4) == "RIFF"
|
||||||
|
&& System.Text.Encoding.ASCII.GetString(buffer, 8, 4) == "WAVE";
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Extracts metadata from WAV file buffer with comprehensive validation
|
/// Extracts metadata from WAV file buffer with comprehensive validation
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -698,4 +797,21 @@ public readonly record struct PcmData(
|
|||||||
ReadOnlyMemory<byte> Pcm,
|
ReadOnlyMemory<byte> Pcm,
|
||||||
int Channels,
|
int Channels,
|
||||||
int SampleRate,
|
int SampleRate,
|
||||||
|
int BitsPerSample);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Where a WAV's PCM data region lives and how to decode it, without the bytes themselves — the
|
||||||
|
/// streaming counterpart of <see cref="PcmData"/>. The caller seeks to <see cref="DataStart"/> and
|
||||||
|
/// streams exactly <see cref="DataLength"/> bytes through a loudness accumulator.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="DataStart">Absolute byte offset of the first PCM sample (past the data chunk header).</param>
|
||||||
|
/// <param name="DataLength">PCM region length in bytes, clamped to what the backing file actually holds.</param>
|
||||||
|
/// <param name="Channels">Number of interleaved channels.</param>
|
||||||
|
/// <param name="SampleRate">Samples per second.</param>
|
||||||
|
/// <param name="BitsPerSample">Bit depth per sample (8, 16, 24, or 32).</param>
|
||||||
|
public readonly record struct WavPcmStreamInfo(
|
||||||
|
long DataStart,
|
||||||
|
long DataLength,
|
||||||
|
int Channels,
|
||||||
|
int SampleRate,
|
||||||
int BitsPerSample);
|
int BitsPerSample);
|
||||||
@@ -20,4 +20,46 @@ public interface ILoudnessAlgorithm
|
|||||||
/// is 1. All zeros when the signal is silent (peak is 0) or no samples are present.
|
/// is 1. All zeros when the signal is silent (peak is 0) or no samples are present.
|
||||||
/// </returns>
|
/// </returns>
|
||||||
double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount);
|
double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a stateful accumulator that reduces the same loudness profile from PCM fed in bounded
|
||||||
|
/// chunks rather than from one contiguous buffer. The streaming waveform path uses this so a long
|
||||||
|
/// track's PCM is never materialized whole in a managed <c>byte[]</c>. The accumulator's output is
|
||||||
|
/// byte-identical to <see cref="Compute"/> for the same total PCM, because <see cref="Compute"/> is
|
||||||
|
/// itself defined in terms of one — the single source of truth for the loudness reduction.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="pcmByteLength">
|
||||||
|
/// Total length of the PCM data region in bytes. Required up front because the bucket each frame
|
||||||
|
/// lands in is derived from the frame's position relative to the total frame count.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="channels">Number of interleaved channels; averaged to mono per frame.</param>
|
||||||
|
/// <param name="sampleRate">Samples per second (used for the envelope-smoothing time base).</param>
|
||||||
|
/// <param name="bitsPerSample">Bit depth (8 unsigned, 16/24/32 signed) used to decode samples.</param>
|
||||||
|
/// <param name="bucketCount">Number of equal time slices to reduce the signal to.</param>
|
||||||
|
ILoudnessAccumulator CreateAccumulator(
|
||||||
|
long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Stateful, single-pass reducer for one loudness profile. Frames are fed via <see cref="Add"/> in
|
||||||
|
/// arbitrary (non-frame-aligned) chunks — a partial frame straddling a chunk boundary is carried
|
||||||
|
/// internally — and <see cref="Finish"/> emits the peak-normalized <c>double[bucketCount]</c>. Not
|
||||||
|
/// thread-safe; feed one stream sequentially. Reusable across the same stream's chunks only, not
|
||||||
|
/// across streams.
|
||||||
|
/// </summary>
|
||||||
|
public interface ILoudnessAccumulator
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Feeds the next run of PCM bytes (interleaved, little-endian). Need not be frame-aligned; bytes
|
||||||
|
/// that do not complete a frame are retained until the next call. Bytes past the total frame count
|
||||||
|
/// declared at construction are ignored, matching the whole-buffer path's trailing-partial-frame drop.
|
||||||
|
/// </summary>
|
||||||
|
void Add(ReadOnlySpan<byte> pcmChunk);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Finalizes and returns the peak-normalized loudness profile (<c>double[bucketCount]</c>, each in
|
||||||
|
/// [0, 1]). All zeros for silence or a degenerate (no-frame) input. Call once, after the last
|
||||||
|
/// <see cref="Add"/>.
|
||||||
|
/// </summary>
|
||||||
|
double[] Finish();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,100 +18,27 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public const double SmoothingTimeConstantSeconds = 0.005;
|
public const double SmoothingTimeConstantSeconds = 0.005;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Whole-buffer reduction. Defined in terms of <see cref="CreateAccumulator"/> so the streaming and
|
||||||
|
/// whole-buffer paths share one decode + finalize implementation — byte-identical output by
|
||||||
|
/// construction, not by parallel maintenance.
|
||||||
|
/// </summary>
|
||||||
public double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
public double[] Compute(ReadOnlySpan<byte> pcmData, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
||||||
|
{
|
||||||
|
var accumulator = CreateAccumulator(pcmData.Length, channels, sampleRate, bitsPerSample, bucketCount);
|
||||||
|
accumulator.Add(pcmData);
|
||||||
|
return accumulator.Finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
public ILoudnessAccumulator CreateAccumulator(
|
||||||
|
long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
||||||
{
|
{
|
||||||
if (bucketCount <= 0)
|
if (bucketCount <= 0)
|
||||||
{
|
{
|
||||||
throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive.");
|
throw new ArgumentOutOfRangeException(nameof(bucketCount), "Bucket count must be positive.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var result = new double[bucketCount];
|
return new RmsLoudnessAccumulator(pcmByteLength, channels, sampleRate, bitsPerSample, bucketCount);
|
||||||
|
|
||||||
if (channels <= 0)
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
var bytesPerSample = bitsPerSample / 8;
|
|
||||||
if (bytesPerSample <= 0)
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
var bytesPerFrame = bytesPerSample * channels;
|
|
||||||
var frameCount = pcmData.Length / bytesPerFrame;
|
|
||||||
if (frameCount == 0)
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sum of squared mono amplitudes and the frame count, per bucket. A frame's bucket is
|
|
||||||
// determined by its position in the timeline so buckets are equal-duration slices.
|
|
||||||
var sumSquares = new double[bucketCount];
|
|
||||||
var counts = new long[bucketCount];
|
|
||||||
|
|
||||||
for (var frame = 0; frame < frameCount; frame++)
|
|
||||||
{
|
|
||||||
var frameStart = frame * bytesPerFrame;
|
|
||||||
|
|
||||||
double channelSum = 0;
|
|
||||||
for (var ch = 0; ch < channels; ch++)
|
|
||||||
{
|
|
||||||
var sampleStart = frameStart + ch * bytesPerSample;
|
|
||||||
channelSum += ReadSampleNormalized(pcmData, sampleStart, bitsPerSample);
|
|
||||||
}
|
|
||||||
|
|
||||||
var mono = channelSum / channels;
|
|
||||||
|
|
||||||
// long math avoids overflow on large files before the divide back into bucket index.
|
|
||||||
var bucket = (int)((long)frame * bucketCount / frameCount);
|
|
||||||
if (bucket >= bucketCount)
|
|
||||||
{
|
|
||||||
bucket = bucketCount - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
sumSquares[bucket] += mono * mono;
|
|
||||||
counts[bucket]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (var i = 0; i < bucketCount; i++)
|
|
||||||
{
|
|
||||||
if (counts[i] > 0)
|
|
||||||
{
|
|
||||||
result[i] = Math.Sqrt(sumSquares[i] / counts[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Envelope smoothing (~15 ms): round the spikey per-bucket RMS into a smooth contour before
|
|
||||||
// peak-normalization, so the rendered ribbon reads as a continuous curve, not faceted polygons.
|
|
||||||
// Each bucket spans (totalSeconds / bucketCount) of audio; the filter coefficient is derived
|
|
||||||
// from that against the time constant so the smoothing is duration-aware, not a fixed window.
|
|
||||||
var totalSeconds = (double)frameCount / sampleRate;
|
|
||||||
var bucketSeconds = totalSeconds / bucketCount;
|
|
||||||
SmoothEnvelope(result, bucketSeconds);
|
|
||||||
|
|
||||||
var peak = 0.0;
|
|
||||||
for (var i = 0; i < bucketCount; i++)
|
|
||||||
{
|
|
||||||
if (result[i] > peak)
|
|
||||||
{
|
|
||||||
peak = result[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (peak <= 0)
|
|
||||||
{
|
|
||||||
// Silence — return all zeros (Array is already zero-initialized).
|
|
||||||
Array.Clear(result);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (var i = 0; i < bucketCount; i++)
|
|
||||||
{
|
|
||||||
result[i] /= peak;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -122,7 +49,7 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
|||||||
/// each bucket blends <c>(1 − a)</c> of itself with <c>a</c> of the running envelope. A near-zero
|
/// each bucket blends <c>(1 − a)</c> of itself with <c>a</c> of the running envelope. A near-zero
|
||||||
/// or non-finite bucket duration leaves the data untouched (nothing to smooth meaningfully).
|
/// or non-finite bucket duration leaves the data untouched (nothing to smooth meaningfully).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private static void SmoothEnvelope(double[] data, double bucketSeconds)
|
internal static void SmoothEnvelope(double[] data, double bucketSeconds)
|
||||||
{
|
{
|
||||||
if (data.Length < 2 || bucketSeconds <= 0 || !double.IsFinite(bucketSeconds))
|
if (data.Length < 2 || bucketSeconds <= 0 || !double.IsFinite(bucketSeconds))
|
||||||
{
|
{
|
||||||
@@ -154,7 +81,7 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
|||||||
/// Decodes one PCM sample at <paramref name="offset"/> to a normalized amplitude in [-1, 1].
|
/// Decodes one PCM sample at <paramref name="offset"/> to a normalized amplitude in [-1, 1].
|
||||||
/// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian.
|
/// 8-bit is unsigned (0..255, centered at 128); 16/24/32-bit are signed little-endian.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
|
internal static double ReadSampleNormalized(ReadOnlySpan<byte> data, int offset, int bitsPerSample)
|
||||||
{
|
{
|
||||||
switch (bitsPerSample)
|
switch (bitsPerSample)
|
||||||
{
|
{
|
||||||
@@ -194,3 +121,167 @@ public class RmsLoudnessAlgorithm : ILoudnessAlgorithm
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Single-pass RMS accumulator backing <see cref="RmsLoudnessAlgorithm"/>. Frames are fed via
|
||||||
|
/// <see cref="Add"/> in arbitrary chunks; a partial frame straddling a chunk boundary is carried in a
|
||||||
|
/// one-frame buffer. The per-frame decode, bucket assignment, and per-bucket accumulation are the exact
|
||||||
|
/// arithmetic the former whole-buffer loop used, in the same frame order, so the floating-point result
|
||||||
|
/// is bit-identical whether the PCM arrives in one span or many. <see cref="Finish"/> applies the same
|
||||||
|
/// envelope smoothing and peak-normalization as before. Memory is O(bucketCount + one frame).
|
||||||
|
/// </summary>
|
||||||
|
public sealed class RmsLoudnessAccumulator : ILoudnessAccumulator
|
||||||
|
{
|
||||||
|
private readonly int _channels;
|
||||||
|
private readonly int _sampleRate;
|
||||||
|
private readonly int _bitsPerSample;
|
||||||
|
private readonly int _bucketCount;
|
||||||
|
private readonly int _bytesPerSample;
|
||||||
|
private readonly int _bytesPerFrame;
|
||||||
|
private readonly long _frameCount;
|
||||||
|
|
||||||
|
private readonly double[] _sumSquares;
|
||||||
|
private readonly long[] _counts;
|
||||||
|
private readonly byte[] _carry;
|
||||||
|
private int _carryLen;
|
||||||
|
private long _frameIndex;
|
||||||
|
|
||||||
|
internal RmsLoudnessAccumulator(long pcmByteLength, int channels, int sampleRate, int bitsPerSample, int bucketCount)
|
||||||
|
{
|
||||||
|
_channels = channels;
|
||||||
|
_sampleRate = sampleRate;
|
||||||
|
_bitsPerSample = bitsPerSample;
|
||||||
|
_bucketCount = bucketCount;
|
||||||
|
_sumSquares = new double[bucketCount];
|
||||||
|
_counts = new long[bucketCount];
|
||||||
|
|
||||||
|
// Guards mirror the former whole-buffer Compute exactly: any degenerate parameter leaves
|
||||||
|
// _frameCount at 0, so Add is a no-op and Finish returns the zero-initialized profile.
|
||||||
|
_bytesPerSample = bitsPerSample / 8;
|
||||||
|
if (channels <= 0 || _bytesPerSample <= 0)
|
||||||
|
{
|
||||||
|
_bytesPerFrame = 0;
|
||||||
|
_frameCount = 0;
|
||||||
|
_carry = [];
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_bytesPerFrame = _bytesPerSample * channels;
|
||||||
|
_frameCount = pcmByteLength / _bytesPerFrame;
|
||||||
|
_carry = new byte[_bytesPerFrame];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Add(ReadOnlySpan<byte> pcmChunk)
|
||||||
|
{
|
||||||
|
if (_frameIndex >= _frameCount)
|
||||||
|
{
|
||||||
|
return; // degenerate input, or every expected frame already consumed
|
||||||
|
}
|
||||||
|
|
||||||
|
var pos = 0;
|
||||||
|
|
||||||
|
// Complete a frame carried from the previous chunk first.
|
||||||
|
if (_carryLen > 0)
|
||||||
|
{
|
||||||
|
var need = _bytesPerFrame - _carryLen;
|
||||||
|
var take = Math.Min(need, pcmChunk.Length);
|
||||||
|
pcmChunk.Slice(0, take).CopyTo(_carry.AsSpan(_carryLen));
|
||||||
|
_carryLen += take;
|
||||||
|
pos += take;
|
||||||
|
|
||||||
|
if (_carryLen < _bytesPerFrame)
|
||||||
|
{
|
||||||
|
return; // still not a full frame
|
||||||
|
}
|
||||||
|
|
||||||
|
ProcessFrame(_carry);
|
||||||
|
_carryLen = 0;
|
||||||
|
if (_frameIndex >= _frameCount)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Whole frames directly from the chunk.
|
||||||
|
while (pos + _bytesPerFrame <= pcmChunk.Length && _frameIndex < _frameCount)
|
||||||
|
{
|
||||||
|
ProcessFrame(pcmChunk.Slice(pos, _bytesPerFrame));
|
||||||
|
pos += _bytesPerFrame;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stash a trailing partial frame for the next chunk — but only while frames are still expected.
|
||||||
|
// A trailing partial frame on the final chunk is dropped, matching the whole-buffer path.
|
||||||
|
if (_frameIndex < _frameCount && pos < pcmChunk.Length)
|
||||||
|
{
|
||||||
|
var remainder = pcmChunk.Slice(pos);
|
||||||
|
remainder.CopyTo(_carry);
|
||||||
|
_carryLen = remainder.Length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ProcessFrame(ReadOnlySpan<byte> frame)
|
||||||
|
{
|
||||||
|
double channelSum = 0;
|
||||||
|
for (var ch = 0; ch < _channels; ch++)
|
||||||
|
{
|
||||||
|
channelSum += RmsLoudnessAlgorithm.ReadSampleNormalized(frame, ch * _bytesPerSample, _bitsPerSample);
|
||||||
|
}
|
||||||
|
|
||||||
|
var mono = channelSum / _channels;
|
||||||
|
|
||||||
|
// long math avoids overflow on large files before the divide back into bucket index.
|
||||||
|
var bucket = (int)(_frameIndex * _bucketCount / _frameCount);
|
||||||
|
if (bucket >= _bucketCount)
|
||||||
|
{
|
||||||
|
bucket = _bucketCount - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
_sumSquares[bucket] += mono * mono;
|
||||||
|
_counts[bucket]++;
|
||||||
|
_frameIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double[] Finish()
|
||||||
|
{
|
||||||
|
var result = new double[_bucketCount];
|
||||||
|
if (_frameCount == 0)
|
||||||
|
{
|
||||||
|
return result; // degenerate input — all zeros, as the whole-buffer guards returned
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < _bucketCount; i++)
|
||||||
|
{
|
||||||
|
if (_counts[i] > 0)
|
||||||
|
{
|
||||||
|
result[i] = Math.Sqrt(_sumSquares[i] / _counts[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Envelope smoothing (~15 ms) then peak-normalization — identical to the whole-buffer finalize.
|
||||||
|
var totalSeconds = (double)_frameCount / _sampleRate;
|
||||||
|
var bucketSeconds = totalSeconds / _bucketCount;
|
||||||
|
RmsLoudnessAlgorithm.SmoothEnvelope(result, bucketSeconds);
|
||||||
|
|
||||||
|
var peak = 0.0;
|
||||||
|
for (var i = 0; i < _bucketCount; i++)
|
||||||
|
{
|
||||||
|
if (result[i] > peak)
|
||||||
|
{
|
||||||
|
peak = result[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (peak <= 0)
|
||||||
|
{
|
||||||
|
Array.Clear(result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < _bucketCount; i++)
|
||||||
|
{
|
||||||
|
result[i] /= peak;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,6 +17,10 @@ public class WaveformProfileService
|
|||||||
{
|
{
|
||||||
private const string ProfileExtension = ".wfp";
|
private const string ProfileExtension = ".wfp";
|
||||||
|
|
||||||
|
/// <summary>Bounded read-buffer size for the streaming PCM pass — the only filesize-independent
|
||||||
|
/// allocation on the streaming path (matches the store path's 80 KB copy buffer).</summary>
|
||||||
|
private const int StreamReadBufferSize = 81920;
|
||||||
|
|
||||||
private readonly FileDb _fileDatabase;
|
private readonly FileDb _fileDatabase;
|
||||||
private readonly AudioProcessor _audioProcessor;
|
private readonly AudioProcessor _audioProcessor;
|
||||||
private readonly ILoudnessAlgorithm _loudnessAlgorithm;
|
private readonly ILoudnessAlgorithm _loudnessAlgorithm;
|
||||||
@@ -117,6 +121,161 @@ public class WaveformProfileService
|
|||||||
return ComputeAndStoreAsync(wavBytes, entryKey, bucketCount, VaultConstants.TrackWaveforms);
|
return ComputeAndStoreAsync(wavBytes, entryKey, bucketCount, VaultConstants.TrackWaveforms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Streaming counterpart of <see cref="ComputeAndStoreAsync"/>: computes and stores the fixed
|
||||||
|
/// 512-bucket player-bar profile by reading the WAV from <paramref name="openWavStream"/> in bounded
|
||||||
|
/// chunks, never materializing the whole file in a managed <c>byte[]</c>. Tri-state result matches
|
||||||
|
/// the <c>RemoveResourceAsync</c> idiom so callers can map outcomes precisely: <c>null</c> = no audio
|
||||||
|
/// stream available (the entry has no backing audio); <c>false</c> = audio present but no profile
|
||||||
|
/// computable (non-WAV / float / padded) or the vault write failed; <c>true</c> = stored. Output is
|
||||||
|
/// byte-identical to the whole-buffer path for the same WAV.
|
||||||
|
/// </summary>
|
||||||
|
public Task<bool?> ComputeAndStoreProfileStreamingAsync(
|
||||||
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
||||||
|
string entryKey,
|
||||||
|
CancellationToken ct = default) =>
|
||||||
|
RunStreamingAsync(
|
||||||
|
openWavStream, entryKey,
|
||||||
|
[(_options.BucketCount, VaultConstants.WaveformProfiles)], ct);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Streaming counterpart of <see cref="ComputeAndStoreHighResAsync"/>: computes and stores the
|
||||||
|
/// duration-derived high-res datum (<see cref="VaultConstants.TrackWaveforms"/>) by streaming the WAV
|
||||||
|
/// from <paramref name="openWavStream"/>. <paramref name="durationSeconds"/> drives the bucket count
|
||||||
|
/// exactly as the whole-buffer path's <c>audio.Duration</c> did — pass the same vault-metadata
|
||||||
|
/// duration to keep the stored bytes identical. Tri-state result as in
|
||||||
|
/// <see cref="ComputeAndStoreProfileStreamingAsync"/>.
|
||||||
|
/// </summary>
|
||||||
|
public Task<bool?> ComputeAndStoreHighResStreamingAsync(
|
||||||
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
||||||
|
string entryKey,
|
||||||
|
double durationSeconds,
|
||||||
|
CancellationToken ct = default) =>
|
||||||
|
RunStreamingAsync(
|
||||||
|
openWavStream, entryKey,
|
||||||
|
[(WaveformResolution.BucketCountForDuration(durationSeconds), VaultConstants.TrackWaveforms)], ct);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Computes and stores BOTH datums a track carries — the 512-bucket profile and the duration-derived
|
||||||
|
/// high-res datum — from a SINGLE streaming pass over the WAV. One sequential read of the (possibly
|
||||||
|
/// ~GB) audio feeds two independent accumulators, so memory stays O(bucket arrays + read buffer) and
|
||||||
|
/// disk I/O is halved versus two separate passes. This is the upload / replace-audio hot path. Each
|
||||||
|
/// datum's stored bytes are byte-identical to its whole-buffer counterpart. Tri-state: <c>null</c> =
|
||||||
|
/// no audio stream; <c>false</c> = not WAV-decodable or a vault write failed; <c>true</c> = both
|
||||||
|
/// datums stored. Best-effort callers ignore the result.
|
||||||
|
/// </summary>
|
||||||
|
public Task<bool?> ComputeAndStoreAllStreamingAsync(
|
||||||
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
||||||
|
string entryKey,
|
||||||
|
double durationSeconds,
|
||||||
|
CancellationToken ct = default) =>
|
||||||
|
RunStreamingAsync(
|
||||||
|
openWavStream, entryKey,
|
||||||
|
[
|
||||||
|
(_options.BucketCount, VaultConstants.WaveformProfiles),
|
||||||
|
(WaveformResolution.BucketCountForDuration(durationSeconds), VaultConstants.TrackWaveforms),
|
||||||
|
],
|
||||||
|
ct);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Core streaming reduction: opens the WAV once, parses its header (bounded), then streams the PCM
|
||||||
|
/// data region through one loudness accumulator per requested target, storing each datum. All
|
||||||
|
/// targets are computed in the single pass. See the tri-state contract on the public wrappers.
|
||||||
|
/// </summary>
|
||||||
|
private async Task<bool?> RunStreamingAsync(
|
||||||
|
Func<CancellationToken, Task<Stream?>> openWavStream,
|
||||||
|
string entryKey,
|
||||||
|
IReadOnlyList<(int BucketCount, string VaultName)> targets,
|
||||||
|
CancellationToken ct)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await using var stream = await openWavStream(ct);
|
||||||
|
if (stream is null)
|
||||||
|
{
|
||||||
|
// No backing audio for this entry — distinct from "present but undecodable".
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var info = await _audioProcessor.TryReadPcmStreamInfoAsync(stream, stream.Length, ct);
|
||||||
|
if (info is null)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Waveform profile not computed for {EntryKey}: WAV PCM could not be extracted (streaming).",
|
||||||
|
entryKey);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
var v = info.Value;
|
||||||
|
var accumulators = new ILoudnessAccumulator[targets.Count];
|
||||||
|
for (var i = 0; i < targets.Count; i++)
|
||||||
|
{
|
||||||
|
accumulators[i] = _loudnessAlgorithm.CreateAccumulator(
|
||||||
|
v.DataLength, v.Channels, v.SampleRate, v.BitsPerSample, targets[i].BucketCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
await StreamPcmThroughAsync(stream, v.DataStart, v.DataLength, accumulators, ct);
|
||||||
|
|
||||||
|
_logger.LogInformation(
|
||||||
|
"Streaming waveform compute for {EntryKey}: {DataLength} PCM bytes, {TargetCount} datum(s), " +
|
||||||
|
"{BufferSize}B read buffer — no whole-file load.",
|
||||||
|
entryKey, v.DataLength, targets.Count, StreamReadBufferSize);
|
||||||
|
|
||||||
|
var allStored = true;
|
||||||
|
for (var i = 0; i < targets.Count; i++)
|
||||||
|
{
|
||||||
|
var profile = accumulators[i].Finish();
|
||||||
|
var quantized = Quantize(profile);
|
||||||
|
|
||||||
|
await EnsureVaultAsync(targets[i].VaultName);
|
||||||
|
var binary = new MediaBinary(new MediaBinaryParams(quantized, quantized.Length, ProfileExtension));
|
||||||
|
var stored = await _fileDatabase.RegisterResourceAsync(targets[i].VaultName, entryKey, binary);
|
||||||
|
if (!stored)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Waveform vault write failed for {EntryKey} in {VaultName}.", entryKey, targets[i].VaultName);
|
||||||
|
allStored = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return allStored;
|
||||||
|
}
|
||||||
|
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||||
|
{
|
||||||
|
_logger.LogError(ex, "Streaming waveform computation failed for {EntryKey}.", entryKey);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Seeks to the PCM data region and streams exactly <paramref name="dataLength"/> bytes through each
|
||||||
|
/// accumulator in bounded reads. The accumulators carry partial frames internally, so the read
|
||||||
|
/// boundaries need not align to frames. Peak memory is one read buffer — independent of file size.
|
||||||
|
/// </summary>
|
||||||
|
private static async Task StreamPcmThroughAsync(
|
||||||
|
Stream stream, long dataStart, long dataLength, ILoudnessAccumulator[] accumulators, CancellationToken ct)
|
||||||
|
{
|
||||||
|
stream.Seek(dataStart, SeekOrigin.Begin);
|
||||||
|
|
||||||
|
var buffer = new byte[StreamReadBufferSize];
|
||||||
|
var remaining = dataLength;
|
||||||
|
while (remaining > 0)
|
||||||
|
{
|
||||||
|
var want = (int)Math.Min(buffer.Length, remaining);
|
||||||
|
var read = await stream.ReadAsync(buffer.AsMemory(0, want), ct);
|
||||||
|
if (read == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
var span = buffer.AsSpan(0, read);
|
||||||
|
foreach (var accumulator in accumulators)
|
||||||
|
{
|
||||||
|
accumulator.Add(span);
|
||||||
|
}
|
||||||
|
|
||||||
|
remaining -= read;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns the stored quantized profile bytes for a track from <paramref name="vaultName"/>
|
/// Returns the stored quantized profile bytes for a track from <paramref name="vaultName"/>
|
||||||
/// (defaults to <see cref="VaultConstants.WaveformProfiles"/> when null), or null if no profile
|
/// (defaults to <see cref="VaultConstants.WaveformProfiles"/> when null), or null if no profile
|
||||||
|
|||||||
@@ -201,6 +201,45 @@ public class TrackContentService
|
|||||||
return await _fileDatabase.LoadResourceAsync<AudioBinary>(VaultConstants.Tracks, trackId);
|
return await _fileDatabase.LoadResourceAsync<AudioBinary>(VaultConstants.Tracks, trackId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Opens a read-only, seekable stream over a track's vault audio, or null if the entry has no
|
||||||
|
/// backing file. The caller owns the stream and must dispose it. Unlike <see cref="GetAudioBinaryAsync"/>
|
||||||
|
/// this never buffers the whole file — it is the source for the streaming waveform compute. Follows
|
||||||
|
/// the FileDatabase swallow-and-return-null contract.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="trackId">Track ID (EntryKey)</param>
|
||||||
|
public async Task<Stream?> OpenAudioStreamAsync(string trackId)
|
||||||
|
{
|
||||||
|
var vault = _fileDatabase.GetVault(VaultConstants.Tracks);
|
||||||
|
if (vault is null)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var media = await vault.GetEntryStreamAsync(trackId);
|
||||||
|
return media?.Stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reads a track's stored audio duration from the vault index metadata WITHOUT loading the audio
|
||||||
|
/// body — the cheap counterpart of <c>GetAudioBinaryAsync(...).Duration</c>. Returns null if the
|
||||||
|
/// entry is unknown or carries no audio metadata. The streaming high-res waveform path uses this to
|
||||||
|
/// derive the duration-based bucket count, matching the value the whole-buffer path read off
|
||||||
|
/// <see cref="AudioBinary.Duration"/> so the stored datum is byte-identical.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="trackId">Track ID (EntryKey)</param>
|
||||||
|
public async Task<double?> GetAudioDurationAsync(string trackId)
|
||||||
|
{
|
||||||
|
var vault = _fileDatabase.GetVault(VaultConstants.Tracks);
|
||||||
|
if (vault is null)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var metaData = await vault.GetEntryMetadata(trackId);
|
||||||
|
return metaData is AudioMetaData audio ? audio.Duration : null;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Checks if FileDatabase is available and tracks vault exists
|
/// Checks if FileDatabase is available and tracks vault exists
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user