using System.Text; using DeepDrftContent.Constants; using DeepDrftContent.Processors; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using FileDb = DeepDrftContent.FileDatabase.Services.FileDatabase; namespace DeepDrftTests; /// /// Wave 2 parity tests for the streaming waveform compute. The single most important property: the /// streaming path (bounded reads, never the whole file in a managed byte[]) must store bytes IDENTICAL /// to the prior whole-buffer path for the same WAV — for both the 512-bucket profile and the /// duration-derived high-res datum. These tests compute the profile both ways over the same WAV and /// assert byte-equality, then cover the bounded-memory guarantee, the mp3/flac graceful-null, and the /// sample-alignment edges (fewer samples than buckets; a data region whose length is not a whole /// multiple of the frame size). /// [TestFixture] public class WaveformStreamingParityTests { private string _testDir = string.Empty; [SetUp] public void SetUp() { _testDir = Path.Combine(Path.GetTempPath(), "WaveformStreamingParityTests", Guid.NewGuid().ToString()); Directory.CreateDirectory(_testDir); } [TearDown] public void TearDown() { try { Directory.Delete(_testDir, recursive: true); } catch { /* Best-effort cleanup — ignore failures */ } } private static WaveformProfileService CreateService(FileDb fileDatabase) => new( fileDatabase, new AudioProcessor(), new RmsLoudnessAlgorithm(), Options.Create(new WaveformProfileOptions()), NullLogger.Instance); // ----- Parity: 512-bucket profile ----- [Test] public async Task ProfileStreaming_16BitStereo_ByteIdenticalToWholeBuffer() { var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); var wav = BuildPcmWav(sampleRate: 44100, channels: 2, bitsPerSample: 16, frames: 50_000); var (reference, streaming) = await ComputeProfileBothWaysAsync(service, wav); Assert.That(streaming, Is.Not.Null); Assert.That(streaming, Is.EqualTo(reference).AsCollection, "Streaming 512-bucket profile must be byte-identical to the whole-buffer computation"); } [Test] public async Task ProfileStreaming_24BitStereo_NormalizedShape_ByteIdenticalToWholeBuffer() { // 24-bit standard PCM is exactly what the store path emits when it normalizes a 24-in-32 / float // EXTENSIBLE source. bytesPerFrame = 6 exercises odd alignment across the bounded read boundary. var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); var wav = BuildPcmWav(sampleRate: 48000, channels: 2, bitsPerSample: 24, frames: 40_000); var (reference, streaming) = await ComputeProfileBothWaysAsync(service, wav); Assert.That(streaming, Is.EqualTo(reference).AsCollection, "Streaming profile for a 24-bit normalized-shape WAV must be byte-identical"); } // ----- Parity: high-res datum ----- [Test] public async Task HighResStreaming_16BitStereo_ByteIdenticalToWholeBuffer() { var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); const double duration = 7.0; // > floor so the bucket count is genuinely duration-derived var wav = BuildPcmWav(sampleRate: 44100, channels: 2, bitsPerSample: 16, frames: (int)(44100 * duration)); await service.ComputeAndStoreHighResAsync(wav, "ref", duration); var reference = await service.GetProfileAsync("ref", VaultConstants.TrackWaveforms); var stored = await service.ComputeAndStoreHighResStreamingAsync( _ => Task.FromResult(new MemoryStream(wav)), "stream", duration); var streaming = await service.GetProfileAsync("stream", VaultConstants.TrackWaveforms); Assert.That(stored, Is.True); Assert.That(streaming, Is.EqualTo(reference).AsCollection, "Streaming high-res datum must be byte-identical to the whole-buffer computation"); } [Test] public async Task AllStreaming_HotPath_BothDatumsByteIdenticalToWholeBuffer() { // The upload/replace hot path computes BOTH datums in one streaming pass. Each must match its // whole-buffer counterpart exactly. var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); const double duration = 5.0; var wav = BuildPcmWav(sampleRate: 44100, channels: 2, bitsPerSample: 16, frames: (int)(44100 * duration)); await service.ComputeAndStoreAsync(wav, "ref"); await service.ComputeAndStoreHighResAsync(wav, "ref", duration); var refProfile = await service.GetProfileAsync("ref"); var refHighRes = await service.GetProfileAsync("ref", VaultConstants.TrackWaveforms); var stored = await service.ComputeAndStoreAllStreamingAsync( _ => Task.FromResult(new MemoryStream(wav)), "stream", duration); var streamProfile = await service.GetProfileAsync("stream"); var streamHighRes = await service.GetProfileAsync("stream", VaultConstants.TrackWaveforms); Assert.Multiple(() => { Assert.That(stored, Is.True); Assert.That(streamProfile, Is.EqualTo(refProfile).AsCollection, "512-bucket profile parity"); Assert.That(streamHighRes, Is.EqualTo(refHighRes).AsCollection, "high-res datum parity"); }); } // ----- Bounded memory ----- [Test] public async Task ProfileStreaming_LargeWav_ReadsInBoundedChunks_NeverWholeFile() { var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); // ~1.4 MB of PCM — well past any single bounded read, so the consumer is forced to chunk. var wav = BuildPcmWav(sampleRate: 44100, channels: 2, bitsPerSample: 16, frames: 180_000); var counter = new MaxReadTrackingStream(wav); var stored = await service.ComputeAndStoreProfileStreamingAsync(_ => Task.FromResult(counter), "big"); Assert.Multiple(() => { Assert.That(stored, Is.True, "A large, only-chunk-readable WAV must compute successfully"); Assert.That(counter.MaxSingleRead, Is.LessThanOrEqualTo(81920), "No single read may request the whole file — peak buffer is bounded, not O(filesize)"); Assert.That(counter.ReadCallCount, Is.GreaterThan(1), "A file larger than one buffer must be drained across multiple bounded reads"); }); } // ----- mp3 / flac graceful-null ----- [Test] public async Task ProfileStreaming_NonWavBytes_WritesNoProfile_ReturnsFalse() { var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); // Stand-in for mp3/flac: present audio bytes that are not a RIFF/WAVE container. var notWav = Encoding.ASCII.GetBytes("ID3not-a-wav-file-payload-goes-here-and-on-and-on"); var stored = await service.ComputeAndStoreProfileStreamingAsync( _ => Task.FromResult(new MemoryStream(notWav)), "mp3-like"); Assert.Multiple(() => { Assert.That(stored, Is.False, "Non-WAV audio yields no profile (graceful), not a crash"); Assert.That(service.GetProfileAsync("mp3-like").Result, Is.Null, "No profile must be stored"); }); } [Test] public async Task ProfileStreaming_NoAudioStream_ReturnsNull() { var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); var stored = await service.ComputeAndStoreProfileStreamingAsync( _ => Task.FromResult(null), "missing"); Assert.That(stored, Is.Null, "No backing audio stream is the tri-state null (maps to 404 upstream)"); } // ----- Sample-alignment edges ----- [Test] public async Task ProfileStreaming_FewerSamplesThanBuckets_ByteIdenticalToWholeBuffer() { var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); // 200 frames into 512 buckets — most buckets get zero or one frame; exercises the empty-bucket // and single-frame branches identically on both paths. var wav = BuildPcmWav(sampleRate: 44100, channels: 2, bitsPerSample: 16, frames: 200); var (reference, streaming) = await ComputeProfileBothWaysAsync(service, wav); Assert.That(streaming, Is.EqualTo(reference).AsCollection, "A WAV with fewer frames than buckets must produce byte-identical output"); } [Test] public async Task ProfileStreaming_DataNotWholeMultipleOfFrame_DropsPartialFrameIdentically() { var db = await FileDb.FromAsync(_testDir); var service = CreateService(db!); // Declare a data region with a trailing partial frame (blockAlign-1 extra bytes). Both paths must // clamp/drop the partial frame the same way, leaving identical output. var wav = BuildPcmWav( sampleRate: 44100, channels: 2, bitsPerSample: 16, frames: 12_345, trailingPartialBytes: 3 /* blockAlign for 16-bit stereo is 4 → a 3-byte partial frame */); var (reference, streaming) = await ComputeProfileBothWaysAsync(service, wav); Assert.That(streaming, Is.EqualTo(reference).AsCollection, "A non-frame-aligned data length must drop the trailing partial frame identically on both paths"); } // ----- helpers ----- private static async Task<(byte[]? Reference, byte[]? Streaming)> ComputeProfileBothWaysAsync( WaveformProfileService service, byte[] wav) { await service.ComputeAndStoreAsync(wav, "ref"); var reference = await service.GetProfileAsync("ref"); var stored = await service.ComputeAndStoreProfileStreamingAsync( _ => Task.FromResult(new MemoryStream(wav)), "stream"); Assert.That(stored, Is.True, "Streaming compute should succeed for a decodable WAV"); var streaming = await service.GetProfileAsync("stream"); return (reference, streaming); } /// /// Builds a standard-PCM WAV with a deterministic broadband (non-silent) data region so the loudness /// reduction and peak-normalization are genuinely exercised. /// appends bytes that do not complete a frame and includes them in the declared data size, to force /// the partial-frame-drop path. /// private static byte[] BuildPcmWav( int sampleRate, ushort channels, ushort bitsPerSample, int frames, int trailingPartialBytes = 0) { var blockAlign = (ushort)(channels * (bitsPerSample / 8)); var byteRate = (uint)(sampleRate * blockAlign); var dataLength = frames * blockAlign + trailingPartialBytes; var data = new byte[dataLength]; // Deterministic LCG fill — a broadband signal, identical on every run, non-degenerate per bucket. uint state = 0x12345678u; for (var i = 0; i < data.Length; i++) { state = state * 1664525u + 1013904223u; data[i] = (byte)(state >> 24); } using var ms = new MemoryStream(); using var w = new BinaryWriter(ms, Encoding.ASCII, leaveOpen: true); w.Write(Encoding.ASCII.GetBytes("RIFF")); w.Write((uint)(36 + data.Length)); w.Write(Encoding.ASCII.GetBytes("WAVE")); w.Write(Encoding.ASCII.GetBytes("fmt ")); w.Write(16u); w.Write((ushort)1); // PCM w.Write(channels); w.Write((uint)sampleRate); w.Write(byteRate); w.Write(blockAlign); w.Write(bitsPerSample); w.Write(Encoding.ASCII.GetBytes("data")); w.Write((uint)data.Length); w.Write(data); w.Flush(); return ms.ToArray(); } /// /// A seekable read-only stream over a byte buffer that records the largest single read length /// requested and how many reads occurred — the assertion mechanism for "the consumer drains in /// bounded chunks, never asking for the whole file at once". /// private sealed class MaxReadTrackingStream : Stream { private readonly MemoryStream _inner; public MaxReadTrackingStream(byte[] bytes) => _inner = new MemoryStream(bytes, writable: false); public int MaxSingleRead { get; private set; } public int ReadCallCount { get; private set; } private void Track(int count) { ReadCallCount++; if (count > MaxSingleRead) MaxSingleRead = count; } public override int Read(byte[] buffer, int offset, int count) { Track(count); return _inner.Read(buffer, offset, count); } public override int Read(Span buffer) { Track(buffer.Length); return _inner.Read(buffer); } public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) { Track(buffer.Length); return _inner.ReadAsync(buffer, cancellationToken); } public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { Track(count); return _inner.ReadAsync(buffer, offset, count, cancellationToken); } public override bool CanRead => true; public override bool CanSeek => true; public override bool CanWrite => false; public override long Length => _inner.Length; public override long Position { get => _inner.Position; set => _inner.Position = value; } public override long Seek(long offset, SeekOrigin origin) => _inner.Seek(offset, origin); public override void Flush() { } public override void SetLength(long value) => throw new NotSupportedException(); public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); protected override void Dispose(bool disposing) { if (disposing) _inner.Dispose(); base.Dispose(disposing); } } }