From 88ac5b2c889e81d67af291324687b4b3db046042 Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Wed, 10 Jun 2026 15:20:34 -0400 Subject: [PATCH] fix(audio): support WAVE_FORMAT_EXTENSIBLE PCM WAVs, normalizing them to standard PCM on upload --- DeepDrftContent/Processors/AudioProcessor.cs | 102 +++++++++++++++++-- DeepDrftPublic/Interop/wavutils.ts | 19 +++- 2 files changed, 110 insertions(+), 11 deletions(-) diff --git a/DeepDrftContent/Processors/AudioProcessor.cs b/DeepDrftContent/Processors/AudioProcessor.cs index af18f73..90acd40 100644 --- a/DeepDrftContent/Processors/AudioProcessor.cs +++ b/DeepDrftContent/Processors/AudioProcessor.cs @@ -28,10 +28,15 @@ public class AudioProcessor { var buffer = await File.ReadAllBytesAsync(filePath); var wavInfo = ExtractWavMetadata(buffer); - + + // EXTENSIBLE-PCM is byte-compatible with standard PCM but carries a 40+ byte fmt chunk + // the streaming pipeline never expects. Normalize to a plain 44-byte PCM WAV at storage + // time so the vault only ever holds standard PCM and the client decode path stays unchanged. + var storedBuffer = wavInfo.IsExtensible ? NormalizeToStandardPcm(buffer, wavInfo) : buffer; + var parameters = new AudioBinaryParams( - Buffer: buffer, - Size: buffer.Length, + Buffer: storedBuffer, + Size: storedBuffer.Length, Extension: ".wav", Duration: wavInfo.Duration, Bitrate: wavInfo.Bitrate @@ -156,9 +161,30 @@ public class AudioProcessor return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" }; } - // Validate audio format (PCM only) + // Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE + // (0xFFFE) is accepted only when its SubFormat GUID indicates PCM — the raw sample data is + // then byte-identical to standard PCM and we normalize it downstream. var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8); - if (audioFormat != 1) + var isExtensible = false; + if (audioFormat == 0xFFFE) + { + // EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes. + if (fmtChunkSize < 40) + { + return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" }; + } + + // SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its + // first two bytes are the little-endian format tag; 0x0001 == WAVE_FORMAT_PCM. + var subFormatPos = fmtChunkPos + 8 + 24; + if (buffer[subFormatPos] != 0x01 || buffer[subFormatPos + 1] != 0x00) + { + return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is not PCM" }; + } + + isExtensible = true; + } + else if (audioFormat != 1) { return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" }; } @@ -170,11 +196,12 @@ public class AudioProcessor return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" }; } - return new WavValidationResult - { - IsValid = true, + return new WavValidationResult + { + IsValid = true, FmtChunkPos = fmtChunkPos, - DataChunkPos = dataChunkPos + DataChunkPos = dataChunkPos, + IsExtensible = isExtensible }; } @@ -190,6 +217,14 @@ public class AudioProcessor var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22); var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4); + // For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in + // wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24) + // but the valid bits are authoritative for the normalized header and metadata. + if (validation.IsExtensible) + { + bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18); + } + var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0; var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000); @@ -201,7 +236,9 @@ public class AudioProcessor Channels = channels, BitsPerSample = bitsPerSample, BlockAlign = blockAlign, - DataSize = (int)dataSize + DataSize = (int)dataSize, + DataChunkPos = validation.DataChunkPos, + IsExtensible = validation.IsExtensible }; } @@ -235,6 +272,48 @@ public class AudioProcessor } } + /// + /// Rebuilds an EXTENSIBLE-PCM WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1). + /// The sample bytes are copied verbatim — EXTENSIBLE-PCM data is byte-identical to standard PCM — + /// only the header is replaced, so the vault stores a format the streaming pipeline already handles. + /// + private byte[] NormalizeToStandardPcm(byte[] buffer, WavMetadata metadata) + { + // Clamp the declared data size to what is actually present; some encoders overshoot. + var dataStart = metadata.DataChunkPos + 8; + var available = buffer.Length - dataStart; + var dataSize = Math.Min(metadata.DataSize, available); + + const int headerSize = 44; + var result = new byte[headerSize + dataSize]; + + var blockAlign = (ushort)(metadata.Channels * (metadata.BitsPerSample / 8)); + var byteRate = (uint)(metadata.SampleRate * blockAlign); + + // RIFF header + System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0); + BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4); + System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8); + + // fmt chunk (standard 16-byte PCM) + System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12); + BitConverter.GetBytes((uint)16).CopyTo(result, 16); + BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM + BitConverter.GetBytes((ushort)metadata.Channels).CopyTo(result, 22); + BitConverter.GetBytes((uint)metadata.SampleRate).CopyTo(result, 24); + BitConverter.GetBytes(byteRate).CopyTo(result, 28); + BitConverter.GetBytes(blockAlign).CopyTo(result, 32); + BitConverter.GetBytes((ushort)metadata.BitsPerSample).CopyTo(result, 34); + + // data chunk + System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36); + BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40); + + Array.Copy(buffer, dataStart, result, headerSize, dataSize); + + return result; + } + /// /// Returns default WAV metadata for fallback scenarios /// @@ -305,6 +384,8 @@ public class AudioProcessor public int BitsPerSample { get; set; } public int BlockAlign { get; set; } public int DataSize { get; set; } + public int DataChunkPos { get; set; } + public bool IsExtensible { get; set; } } /// @@ -316,6 +397,7 @@ public class AudioProcessor public string ErrorMessage { get; set; } = string.Empty; public int FmtChunkPos { get; set; } public int DataChunkPos { get; set; } + public bool IsExtensible { get; set; } } } diff --git a/DeepDrftPublic/Interop/wavutils.ts b/DeepDrftPublic/Interop/wavutils.ts index e169a29..f27efcc 100644 --- a/DeepDrftPublic/Interop/wavutils.ts +++ b/DeepDrftPublic/Interop/wavutils.ts @@ -59,7 +59,24 @@ class WavUtils { // PCM only. The server's WavOffsetService synthesises PCM-shaped headers, // and AudioProcessor rejects non-PCM at upload — accepting Float here would // hand the decoder a header/payload mismatch that surfaces as garbled audio. - if (audioFormat !== 1) { + // WAVE_FORMAT_EXTENSIBLE (0xFFFE) is accepted only when its SubFormat GUID is + // PCM; the sample data is then byte-identical to standard PCM and every PCM + // field sits at the same offset. The vault normalizes uploads to plain PCM, so + // this is belt-and-suspenders for any EXTENSIBLE header that reaches the client. + if (audioFormat === 0xFFFE) { + // EXTENSIBLE needs the full extension: 16 base + 2 cbSize + 22 = 40 bytes. + if (chunkSize < 40) { + console.warn(`EXTENSIBLE fmt chunk too small: ${chunkSize} (need >= 40)`); + return null; + } + // SubFormat GUID at chunkOffset + 8 + 24; first two LE bytes are the format + // tag — 0x0001 == WAVE_FORMAT_PCM. + const subFormatTag = view.getUint16(chunkOffset + 8 + 24, true); + if (subFormatTag !== 1) { + console.warn(`Unsupported EXTENSIBLE SubFormat: ${subFormatTag} (only PCM supported)`); + return null; + } + } else if (audioFormat !== 1) { console.warn(`Unsupported audio format: ${audioFormat} (only PCM=1 supported)`); return null; }