From eddbb00cd970206a26cde8850398459467317416 Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Wed, 10 Jun 2026 20:04:55 -0400 Subject: [PATCH] feat(audio): accept EXTENSIBLE IEEE-float and padded 24-in-32 WAV Convert float to 24-bit PCM and repack padded containers on normalize; vault still stores standard PCM. --- DeepDrftContent/Processors/AudioProcessor.cs | 179 +++++++++++++-- DeepDrftTests/AudioProcessorTests.cs | 227 +++++++++++++++++++ 2 files changed, 384 insertions(+), 22 deletions(-) create mode 100644 DeepDrftTests/AudioProcessorTests.cs diff --git a/DeepDrftContent/Processors/AudioProcessor.cs b/DeepDrftContent/Processors/AudioProcessor.cs index fce5fae..7a0d919 100644 --- a/DeepDrftContent/Processors/AudioProcessor.cs +++ b/DeepDrftContent/Processors/AudioProcessor.cs @@ -69,11 +69,23 @@ public class AudioProcessor return null; } + // Float and padded-container EXTENSIBLE require a sample-level transform to become integer PCM. + // TryExtractPcm feeds loudness analysis, not storage, and must not hand back float bytes + // mislabeled as integer PCM — out of scope here, so treat them as "no profile computable". + if (validation.IsFloat) + { + return null; + } + WavMetadata metadata; try { metadata = ParseWavMetadata(bytes, validation); ValidateAudioParameters(metadata); + if (metadata.IsPaddedContainer) + { + return null; + } } catch { @@ -162,10 +174,12 @@ public class AudioProcessor } // Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE - // (0xFFFE) is accepted only when its SubFormat GUID indicates PCM — the raw sample data is - // then byte-identical to standard PCM and we normalize it downstream. + // (0xFFFE) is accepted when its SubFormat GUID indicates PCM (0x0001) or IEEE float + // (0x0003). PCM sample data is byte-identical to standard PCM; float data is converted to + // 24-bit PCM downstream. Either way the vault only ever holds standard PCM. var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8); var isExtensible = false; + var isFloat = false; if (audioFormat == 0xFFFE) { // EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes. @@ -180,14 +194,23 @@ public class AudioProcessor } // SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its - // first two bytes are the little-endian format tag; 0x0001 == WAVE_FORMAT_PCM. + // first two bytes are the little-endian format tag: 0x0001 == WAVE_FORMAT_PCM, + // 0x0003 == WAVE_FORMAT_IEEE_FLOAT. var subFormatPos = fmtChunkPos + 8 + 24; - if (buffer[subFormatPos] != 0x01 || buffer[subFormatPos + 1] != 0x00) + var subFormatTag = BitConverter.ToUInt16(buffer, subFormatPos); + if (subFormatTag == 0x0001) { - return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is not PCM" }; + isExtensible = true; + } + else if (subFormatTag == 0x0003) + { + isExtensible = true; + isFloat = true; + } + else + { + return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is neither PCM nor IEEE float" }; } - - isExtensible = true; } else if (audioFormat != 1) { @@ -206,7 +229,8 @@ public class AudioProcessor IsValid = true, FmtChunkPos = fmtChunkPos, DataChunkPos = dataChunkPos, - IsExtensible = isExtensible + IsExtensible = isExtensible, + IsFloat = isFloat }; } @@ -224,13 +248,19 @@ public class AudioProcessor // For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in // wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24) - // but the valid bits are authoritative for the normalized header and metadata. - // Note: padded-container EXTENSIBLE (e.g. 24-bit valid in a 32-bit container) is not yet - // supported — the mismatched BlockAlign will cause ValidateAudioParameters to throw and fall - // back to defaults. This is an accepted gap as of this fix. + // but the valid bits are authoritative for the normalized header and metadata. When they + // differ (e.g. 24-bit valid in a 32-bit container) we keep the container width separately so + // ValidateAudioParameters can reconcile against the header BlockAlign and NormalizeToStandardPcm + // can re-pack the padded frames. + var containerBitsPerSample = 0; if (validation.IsExtensible) { - bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18); + var validBits = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18); + if (validBits != bitsPerSample) + { + containerBitsPerSample = bitsPerSample; + } + bitsPerSample = validBits; } var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0; @@ -243,10 +273,12 @@ public class AudioProcessor SampleRate = (int)sampleRate, Channels = channels, BitsPerSample = bitsPerSample, + ContainerBitsPerSample = containerBitsPerSample, BlockAlign = blockAlign, DataSize = (int)dataSize, DataChunkPos = validation.DataChunkPos, - IsExtensible = validation.IsExtensible + IsExtensible = validation.IsExtensible, + IsFloat = validation.IsFloat }; } @@ -273,7 +305,11 @@ public class AudioProcessor throw new InvalidDataException($"Unsupported bit depth: {metadata.BitsPerSample}"); } - var expectedBlockAlign = metadata.Channels * (metadata.BitsPerSample / 8); + // The header BlockAlign reflects the container width, not the valid bit depth. For a padded + // EXTENSIBLE container (e.g. 24-in-32) the container width is authoritative for this check; + // NormalizeToStandardPcm re-packs the frames down to the valid depth afterwards. + var blockAlignBits = metadata.IsPaddedContainer ? metadata.ContainerBitsPerSample : metadata.BitsPerSample; + var expectedBlockAlign = metadata.Channels * (blockAlignBits / 8); if (metadata.BlockAlign != expectedBlockAlign) { throw new InvalidDataException($"Invalid block align: expected {expectedBlockAlign}, got {metadata.BlockAlign}"); @@ -281,21 +317,49 @@ public class AudioProcessor } /// - /// Rebuilds an EXTENSIBLE-PCM WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1). - /// The sample bytes are copied verbatim — EXTENSIBLE-PCM data is byte-identical to standard PCM — - /// only the header is replaced, so the vault stores a format the streaming pipeline already handles. + /// Rebuilds an EXTENSIBLE WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1) + /// so the vault only ever holds a format the streaming pipeline already handles. Three source + /// shapes are normalized: + /// + /// EXTENSIBLE-PCM (depth == container): sample bytes are byte-identical to standard PCM and + /// copied verbatim; only the header is replaced. + /// IEEE float: 32-bit float samples are converted to 24-bit signed integer PCM. + /// Padded container (e.g. 24-in-32): the padding/sign-extension bytes are stripped, keeping + /// the lowest valid bytes per sample. + /// + /// The output header always reports the valid bit depth (). /// private byte[] NormalizeToStandardPcm(byte[] buffer, WavMetadata metadata) { // Clamp the declared data size to what is actually present; some encoders overshoot. var dataStart = metadata.DataChunkPos + 8; var available = buffer.Length - dataStart; - var dataSize = Math.Min(metadata.DataSize, available); + var srcDataSize = Math.Min(metadata.DataSize, available); + byte[] dataBytes; + int outBitsPerSample; + if (metadata.IsFloat) + { + dataBytes = ConvertFloatTo24BitPcm(buffer, dataStart, srcDataSize); + outBitsPerSample = 24; + } + else if (metadata.IsPaddedContainer) + { + dataBytes = RepackPaddedContainer(buffer, dataStart, srcDataSize, metadata.ContainerBitsPerSample, metadata.BitsPerSample); + outBitsPerSample = metadata.BitsPerSample; + } + else + { + dataBytes = new byte[srcDataSize]; + Array.Copy(buffer, dataStart, dataBytes, 0, srcDataSize); + outBitsPerSample = metadata.BitsPerSample; + } + + var dataSize = dataBytes.Length; const int headerSize = 44; var result = new byte[headerSize + dataSize]; - var blockAlign = (ushort)(metadata.Channels * (metadata.BitsPerSample / 8)); + var blockAlign = (ushort)(metadata.Channels * (outBitsPerSample / 8)); var byteRate = (uint)(metadata.SampleRate * blockAlign); // RIFF header @@ -311,17 +375,70 @@ public class AudioProcessor BitConverter.GetBytes((uint)metadata.SampleRate).CopyTo(result, 24); BitConverter.GetBytes(byteRate).CopyTo(result, 28); BitConverter.GetBytes(blockAlign).CopyTo(result, 32); - BitConverter.GetBytes((ushort)metadata.BitsPerSample).CopyTo(result, 34); + BitConverter.GetBytes((ushort)outBitsPerSample).CopyTo(result, 34); // data chunk System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36); BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40); - Array.Copy(buffer, dataStart, result, headerSize, dataSize); + Array.Copy(dataBytes, 0, result, headerSize, dataSize); return result; } + /// + /// Converts 32-bit little-endian IEEE float samples (range [-1.0, 1.0]) to 24-bit signed PCM. + /// Each 4-byte source sample becomes 3 little-endian output bytes; output size is 3/4 of input. + /// Trailing bytes that do not form a complete 4-byte sample are ignored. + /// + private static byte[] ConvertFloatTo24BitPcm(byte[] buffer, int dataStart, int dataSize) + { + var sampleCount = dataSize / 4; + var output = new byte[sampleCount * 3]; + + for (int i = 0; i < sampleCount; i++) + { + var sample = BitConverter.ToSingle(buffer, dataStart + i * 4); + var value = (int)(sample * 8388607.0); + value = Math.Clamp(value, -8388608, 8388607); + + var o = i * 3; + output[o] = (byte)(value & 0xFF); + output[o + 1] = (byte)((value >> 8) & 0xFF); + output[o + 2] = (byte)((value >> 16) & 0xFF); + } + + return output; + } + + /// + /// Strips container padding from a padded-container EXTENSIBLE WAV (e.g. 24-bit valid samples + /// stored in 32-bit containers), keeping only the lowest bytes of + /// each little-endian sample. Output size is (validBits/containerBits) of input. + /// Trailing bytes that do not form a complete container sample are ignored. + /// + private static byte[] RepackPaddedContainer(byte[] buffer, int dataStart, int dataSize, int containerBits, int validBits) + { + var containerBytes = containerBits / 8; + var validBytes = validBits / 8; + var sampleCount = dataSize / containerBytes; + var output = new byte[sampleCount * validBytes]; + + for (int i = 0; i < sampleCount; i++) + { + var src = dataStart + i * containerBytes; + var dst = i * validBytes; + // Little-endian: the valid sample occupies the low bytes; the upper bytes are padding / + // sign extension and are discarded. + for (int b = 0; b < validBytes; b++) + { + output[dst + b] = buffer[src + b]; + } + } + + return output; + } + /// /// Returns default WAV metadata for fallback scenarios /// @@ -389,11 +506,26 @@ public class AudioProcessor public int Bitrate { get; set; } public int SampleRate { get; set; } public int Channels { get; set; } + + /// The valid sample depth — for EXTENSIBLE, wValidBitsPerSample. public int BitsPerSample { get; set; } + + /// + /// The container sample width for a padded EXTENSIBLE WAV whose valid depth is narrower + /// (e.g. 32 for a 24-in-32 file). Zero when the container matches the valid depth. + /// + public int ContainerBitsPerSample { get; set; } + public int BlockAlign { get; set; } public int DataSize { get; set; } public int DataChunkPos { get; set; } public bool IsExtensible { get; set; } + + /// True when the SubFormat is IEEE float (converted to 24-bit PCM on normalization). + public bool IsFloat { get; set; } + + /// True when valid samples are stored in a wider container that must be re-packed. + public bool IsPaddedContainer => ContainerBitsPerSample != 0 && ContainerBitsPerSample != BitsPerSample; } /// @@ -406,6 +538,9 @@ public class AudioProcessor public int FmtChunkPos { get; set; } public int DataChunkPos { get; set; } public bool IsExtensible { get; set; } + + /// True when the EXTENSIBLE SubFormat is IEEE float rather than PCM. + public bool IsFloat { get; set; } } } diff --git a/DeepDrftTests/AudioProcessorTests.cs b/DeepDrftTests/AudioProcessorTests.cs new file mode 100644 index 0000000..0fd5223 --- /dev/null +++ b/DeepDrftTests/AudioProcessorTests.cs @@ -0,0 +1,227 @@ +using System.Text; +using DeepDrftContent.Processors; + +namespace DeepDrftTests; + +/// +/// Tests for WAV format handling — standard PCM, +/// EXTENSIBLE-PCM, EXTENSIBLE IEEE float, padded 24-in-32, and the default-fallback paths. +/// +[TestFixture] +public class AudioProcessorTests +{ + private const ushort WaveFormatPcm = 0x0001; + private const ushort WaveFormatExtensible = 0xFFFE; + + private string _testDir = string.Empty; + + [SetUp] + public void SetUp() + { + _testDir = Path.Combine(Path.GetTempPath(), "AudioProcessorTests", Guid.NewGuid().ToString()); + Directory.CreateDirectory(_testDir); + } + + [TearDown] + public void TearDown() + { + try { Directory.Delete(_testDir, recursive: true); } + catch { /* Best-effort cleanup — ignore failures */ } + } + + [Test] + public async Task StandardPcm_RoundTripsUnchanged() + { + var path = await WriteWavAsync(BuildMinimalWav(channels: 2, sampleRate: 44100, bitsPerSample: 16, audioFormat: WaveFormatPcm)); + + var audio = await new AudioProcessor().ProcessWavFileAsync(path); + + Assert.That(audio, Is.Not.Null); + Assert.That(audio!.Duration, Is.GreaterThan(0.0)); + Assert.That(audio.Bitrate, Is.GreaterThan(0)); + } + + [Test] + public async Task ExtensiblePcm_NormalizesToStandardHeader() + { + var subFormat = SubFormatGuid(WaveFormatPcm); + var wav = BuildMinimalWav(channels: 2, sampleRate: 44100, bitsPerSample: 16, audioFormat: WaveFormatExtensible, + subFormatGuid: subFormat, validBitsPerSample: 16); + var path = await WriteWavAsync(wav); + + var audio = await new AudioProcessor().ProcessWavFileAsync(path); + + Assert.That(audio, Is.Not.Null); + Assert.That(audio!.Duration, Is.GreaterThan(0.0)); + Assert.That(audio.Bitrate, Is.GreaterThan(0)); + Assert.That(ReadFmtAudioFormat(audio.Buffer), Is.EqualTo(WaveFormatPcm), "Stored buffer must be standard PCM"); + } + + [Test] + public async Task ExtensibleIeeeFloat_AcceptedAndConverted() + { + // Two stereo frames of 32-bit float samples (range [-1.0, 1.0]). + var samples = FloatBytes(0.5f, -0.5f, 1.0f, -1.0f); + var subFormat = SubFormatGuid(0x0003); // WAVE_FORMAT_IEEE_FLOAT + var wav = BuildMinimalWav(channels: 2, sampleRate: 44100, bitsPerSample: 32, audioFormat: WaveFormatExtensible, + sampleData: samples, subFormatGuid: subFormat, validBitsPerSample: 32); + var path = await WriteWavAsync(wav); + + var audio = await new AudioProcessor().ProcessWavFileAsync(path); + + Assert.That(audio, Is.Not.Null); + Assert.That(ReadFmtBitsPerSample(audio!.Buffer), Is.EqualTo(16 + 8), "Float must convert to 24-bit PCM"); + Assert.That(ReadFmtAudioFormat(audio.Buffer), Is.EqualTo(WaveFormatPcm)); + // 4 float samples (4 bytes each) → 4 PCM samples (3 bytes each) = 12 data bytes after the 44-byte header. + Assert.That(audio.Buffer.Length, Is.EqualTo(44 + 12)); + } + + [Test] + public async Task ExtensiblePadded24in32_AcceptedAndRepacked() + { + // Two stereo frames; each sample is a 24-bit value stored in a 32-bit little-endian container. + var samples = Padded24In32Bytes(0x123456, unchecked((int)0xFFEDCBA9), 0x000001, unchecked((int)0xFF800000)); + var subFormat = SubFormatGuid(WaveFormatPcm); + var wav = BuildMinimalWav(channels: 2, sampleRate: 44100, bitsPerSample: 32, audioFormat: WaveFormatExtensible, + sampleData: samples, subFormatGuid: subFormat, validBitsPerSample: 24); + var path = await WriteWavAsync(wav); + + var audio = await new AudioProcessor().ProcessWavFileAsync(path); + + Assert.That(audio, Is.Not.Null); + Assert.That(ReadFmtBitsPerSample(audio!.Buffer), Is.EqualTo(24), "Padded container must repack to 24-bit"); + Assert.That(ReadFmtAudioFormat(audio.Buffer), Is.EqualTo(WaveFormatPcm)); + // 4 container samples (4 bytes each) → 4 PCM samples (3 bytes each) = 12 data bytes. + Assert.That(audio.Buffer.Length, Is.EqualTo(44 + 12)); + } + + [Test] + public async Task ExtensibleUnsupportedSubFormat_FallsBackToDefaults() + { + var subFormat = SubFormatGuid(0x0005); // WAVE_FORMAT_DOLBY_AC3 — neither PCM nor float + var wav = BuildMinimalWav(channels: 2, sampleRate: 44100, bitsPerSample: 16, audioFormat: WaveFormatExtensible, + subFormatGuid: subFormat, validBitsPerSample: 16); + var path = await WriteWavAsync(wav); + + var audio = await new AudioProcessor().ProcessWavFileAsync(path); + + Assert.That(audio, Is.Not.Null); + Assert.That(audio!.Duration, Is.EqualTo(180.0), "Unsupported SubFormat must fall back to default metadata"); + } + + [Test] + public async Task ExtensibleFmtTooSmall_FallsBackToDefaults() + { + // audioFormat=EXTENSIBLE but fmt chunk declares 16 bytes — too small for the extension. + var wav = BuildMinimalWav(channels: 2, sampleRate: 44100, bitsPerSample: 16, audioFormat: WaveFormatExtensible, + forceFmtChunkSize: 16); + var path = await WriteWavAsync(wav); + + var audio = await new AudioProcessor().ProcessWavFileAsync(path); + + Assert.That(audio, Is.Not.Null); + Assert.That(audio!.Duration, Is.EqualTo(180.0), "Undersized EXTENSIBLE fmt chunk must fall back to default metadata"); + } + + // -- helpers -------------------------------------------------------------------------------- + + /// + /// Synthesises a minimal valid WAV buffer. For EXTENSIBLE (audioFormat=0xFFFE) the fmt chunk is + /// 40 bytes and includes cbSize, wValidBitsPerSample, channel mask, and the SubFormat GUID. For + /// standard PCM (audioFormat=1) the fmt chunk is 16 bytes. When is + /// null a small block of silence sized to the block alignment is used. + /// + private static byte[] BuildMinimalWav( + int channels, + int sampleRate, + int bitsPerSample, + ushort audioFormat, + byte[]? sampleData = null, + byte[]? subFormatGuid = null, + ushort validBitsPerSample = 0, + uint? forceFmtChunkSize = null) + { + var isExtensible = audioFormat == WaveFormatExtensible; + var fmtChunkSize = forceFmtChunkSize ?? (isExtensible ? 40u : 16u); + + var blockAlign = (ushort)(channels * (bitsPerSample / 8)); + var byteRate = (uint)(sampleRate * blockAlign); + var data = sampleData ?? new byte[blockAlign * 2]; + + using var ms = new MemoryStream(); + using var w = new BinaryWriter(ms, Encoding.ASCII, leaveOpen: true); + + w.Write(Encoding.ASCII.GetBytes("RIFF")); + w.Write((uint)(36 + fmtChunkSize - 16 + data.Length)); // riff size adjusted for fmt extension + w.Write(Encoding.ASCII.GetBytes("WAVE")); + + w.Write(Encoding.ASCII.GetBytes("fmt ")); + w.Write(fmtChunkSize); + w.Write(audioFormat); + w.Write((ushort)channels); + w.Write((uint)sampleRate); + w.Write(byteRate); + w.Write(blockAlign); + w.Write((ushort)bitsPerSample); + + // Only emit the EXTENSIBLE extension when the declared fmt size actually allows for it. A + // forced-small size (fmt=16) leaves audioFormat=EXTENSIBLE but no extension, exercising the + // "fmt too small" fallback. + if (fmtChunkSize >= 40) + { + w.Write((ushort)22); // cbSize + w.Write(validBitsPerSample); + w.Write((uint)0); // channel mask + w.Write(subFormatGuid ?? SubFormatGuid(WaveFormatPcm)); + } + + w.Write(Encoding.ASCII.GetBytes("data")); + w.Write((uint)data.Length); + w.Write(data); + + w.Flush(); + return ms.ToArray(); + } + + /// Builds a 16-byte SubFormat GUID whose leading 2 bytes are the format tag. + private static byte[] SubFormatGuid(ushort formatTag) + { + var guid = new byte[16]; + guid[0] = (byte)(formatTag & 0xFF); + guid[1] = (byte)((formatTag >> 8) & 0xFF); + // Remaining 14 bytes are the fixed KSDATAFORMAT suffix; their value is irrelevant to parsing. + return guid; + } + + private static byte[] FloatBytes(params float[] samples) + { + var bytes = new byte[samples.Length * 4]; + for (int i = 0; i < samples.Length; i++) + { + BitConverter.GetBytes(samples[i]).CopyTo(bytes, i * 4); + } + return bytes; + } + + /// Packs each 24-bit sample value into a 32-bit little-endian container. + private static byte[] Padded24In32Bytes(params int[] samples) + { + var bytes = new byte[samples.Length * 4]; + for (int i = 0; i < samples.Length; i++) + { + BitConverter.GetBytes(samples[i]).CopyTo(bytes, i * 4); + } + return bytes; + } + + private async Task WriteWavAsync(byte[] wav) + { + var path = Path.Combine(_testDir, Guid.NewGuid() + ".wav"); + await File.WriteAllBytesAsync(path, wav); + return path; + } + + private static ushort ReadFmtAudioFormat(byte[] standardPcmWav) => BitConverter.ToUInt16(standardPcmWav, 20); + + private static ushort ReadFmtBitsPerSample(byte[] standardPcmWav) => BitConverter.ToUInt16(standardPcmWav, 34); +}