Merge branch 'extensible-wav-support' into dev

This commit is contained in:
daniel-c-harvey
2026-06-10 15:27:47 -04:00
2 changed files with 118 additions and 11 deletions
+100 -10
View File
@@ -28,10 +28,15 @@ public class AudioProcessor
{ {
var buffer = await File.ReadAllBytesAsync(filePath); var buffer = await File.ReadAllBytesAsync(filePath);
var wavInfo = ExtractWavMetadata(buffer); var wavInfo = ExtractWavMetadata(buffer);
// EXTENSIBLE-PCM is byte-compatible with standard PCM but carries a 40+ byte fmt chunk
// the streaming pipeline never expects. Normalize to a plain 44-byte PCM WAV at storage
// time so the vault only ever holds standard PCM and the client decode path stays unchanged.
var storedBuffer = wavInfo.IsExtensible ? NormalizeToStandardPcm(buffer, wavInfo) : buffer;
var parameters = new AudioBinaryParams( var parameters = new AudioBinaryParams(
Buffer: buffer, Buffer: storedBuffer,
Size: buffer.Length, Size: storedBuffer.Length,
Extension: ".wav", Extension: ".wav",
Duration: wavInfo.Duration, Duration: wavInfo.Duration,
Bitrate: wavInfo.Bitrate Bitrate: wavInfo.Bitrate
@@ -156,9 +161,35 @@ public class AudioProcessor
return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" }; return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" };
} }
// Validate audio format (PCM only) // Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE
// (0xFFFE) is accepted only when its SubFormat GUID indicates PCM — the raw sample data is
// then byte-identical to standard PCM and we normalize it downstream.
var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8); var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8);
if (audioFormat != 1) var isExtensible = false;
if (audioFormat == 0xFFFE)
{
// EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes.
if (fmtChunkSize < 40)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" };
}
if (fmtChunkPos + 8 + 40 > buffer.Length)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk extends past end of file" };
}
// SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its
// first two bytes are the little-endian format tag; 0x0001 == WAVE_FORMAT_PCM.
var subFormatPos = fmtChunkPos + 8 + 24;
if (buffer[subFormatPos] != 0x01 || buffer[subFormatPos + 1] != 0x00)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is not PCM" };
}
isExtensible = true;
}
else if (audioFormat != 1)
{ {
return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" }; return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" };
} }
@@ -170,11 +201,12 @@ public class AudioProcessor
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" }; return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" };
} }
return new WavValidationResult return new WavValidationResult
{ {
IsValid = true, IsValid = true,
FmtChunkPos = fmtChunkPos, FmtChunkPos = fmtChunkPos,
DataChunkPos = dataChunkPos DataChunkPos = dataChunkPos,
IsExtensible = isExtensible
}; };
} }
@@ -190,6 +222,17 @@ public class AudioProcessor
var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22); var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22);
var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4); var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4);
// For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in
// wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24)
// but the valid bits are authoritative for the normalized header and metadata.
// Note: padded-container EXTENSIBLE (e.g. 24-bit valid in a 32-bit container) is not yet
// supported — the mismatched BlockAlign will cause ValidateAudioParameters to throw and fall
// back to defaults. This is an accepted gap as of this fix.
if (validation.IsExtensible)
{
bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18);
}
var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0; var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0;
var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000); var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000);
@@ -201,7 +244,9 @@ public class AudioProcessor
Channels = channels, Channels = channels,
BitsPerSample = bitsPerSample, BitsPerSample = bitsPerSample,
BlockAlign = blockAlign, BlockAlign = blockAlign,
DataSize = (int)dataSize DataSize = (int)dataSize,
DataChunkPos = validation.DataChunkPos,
IsExtensible = validation.IsExtensible
}; };
} }
@@ -235,6 +280,48 @@ public class AudioProcessor
} }
} }
/// <summary>
/// Rebuilds an EXTENSIBLE-PCM WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1).
/// The sample bytes are copied verbatim — EXTENSIBLE-PCM data is byte-identical to standard PCM —
/// only the header is replaced, so the vault stores a format the streaming pipeline already handles.
/// </summary>
private byte[] NormalizeToStandardPcm(byte[] buffer, WavMetadata metadata)
{
// Clamp the declared data size to what is actually present; some encoders overshoot.
var dataStart = metadata.DataChunkPos + 8;
var available = buffer.Length - dataStart;
var dataSize = Math.Min(metadata.DataSize, available);
const int headerSize = 44;
var result = new byte[headerSize + dataSize];
var blockAlign = (ushort)(metadata.Channels * (metadata.BitsPerSample / 8));
var byteRate = (uint)(metadata.SampleRate * blockAlign);
// RIFF header
System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0);
BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4);
System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8);
// fmt chunk (standard 16-byte PCM)
System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12);
BitConverter.GetBytes((uint)16).CopyTo(result, 16);
BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM
BitConverter.GetBytes((ushort)metadata.Channels).CopyTo(result, 22);
BitConverter.GetBytes((uint)metadata.SampleRate).CopyTo(result, 24);
BitConverter.GetBytes(byteRate).CopyTo(result, 28);
BitConverter.GetBytes(blockAlign).CopyTo(result, 32);
BitConverter.GetBytes((ushort)metadata.BitsPerSample).CopyTo(result, 34);
// data chunk
System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36);
BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40);
Array.Copy(buffer, dataStart, result, headerSize, dataSize);
return result;
}
/// <summary> /// <summary>
/// Returns default WAV metadata for fallback scenarios /// Returns default WAV metadata for fallback scenarios
/// </summary> /// </summary>
@@ -305,6 +392,8 @@ public class AudioProcessor
public int BitsPerSample { get; set; } public int BitsPerSample { get; set; }
public int BlockAlign { get; set; } public int BlockAlign { get; set; }
public int DataSize { get; set; } public int DataSize { get; set; }
public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
} }
/// <summary> /// <summary>
@@ -316,6 +405,7 @@ public class AudioProcessor
public string ErrorMessage { get; set; } = string.Empty; public string ErrorMessage { get; set; } = string.Empty;
public int FmtChunkPos { get; set; } public int FmtChunkPos { get; set; }
public int DataChunkPos { get; set; } public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
} }
} }
+18 -1
View File
@@ -59,7 +59,24 @@ class WavUtils {
// PCM only. The server's WavOffsetService synthesises PCM-shaped headers, // PCM only. The server's WavOffsetService synthesises PCM-shaped headers,
// and AudioProcessor rejects non-PCM at upload — accepting Float here would // and AudioProcessor rejects non-PCM at upload — accepting Float here would
// hand the decoder a header/payload mismatch that surfaces as garbled audio. // hand the decoder a header/payload mismatch that surfaces as garbled audio.
if (audioFormat !== 1) { // WAVE_FORMAT_EXTENSIBLE (0xFFFE) is accepted only when its SubFormat GUID is
// PCM; the sample data is then byte-identical to standard PCM and every PCM
// field sits at the same offset. The vault normalizes uploads to plain PCM, so
// this is belt-and-suspenders for any EXTENSIBLE header that reaches the client.
if (audioFormat === 0xFFFE) {
// EXTENSIBLE needs the full extension: 16 base + 2 cbSize + 22 = 40 bytes.
if (chunkSize < 40) {
console.warn(`EXTENSIBLE fmt chunk too small: ${chunkSize} (need >= 40)`);
return null;
}
// SubFormat GUID at chunkOffset + 8 + 24; first two LE bytes are the format
// tag — 0x0001 == WAVE_FORMAT_PCM.
const subFormatTag = view.getUint16(chunkOffset + 8 + 24, true);
if (subFormatTag !== 1) {
console.warn(`Unsupported EXTENSIBLE SubFormat: ${subFormatTag} (only PCM supported)`);
return null;
}
} else if (audioFormat !== 1) {
console.warn(`Unsupported audio format: ${audioFormat} (only PCM=1 supported)`); console.warn(`Unsupported audio format: ${audioFormat} (only PCM=1 supported)`);
return null; return null;
} }