fix(audio): support WAVE_FORMAT_EXTENSIBLE PCM WAVs, normalizing them to standard PCM on upload

This commit is contained in:
daniel-c-harvey
2026-06-10 15:20:34 -04:00
parent 0f5eaa42b5
commit 88ac5b2c88
2 changed files with 110 additions and 11 deletions
+92 -10
View File
@@ -28,10 +28,15 @@ public class AudioProcessor
{
var buffer = await File.ReadAllBytesAsync(filePath);
var wavInfo = ExtractWavMetadata(buffer);
// EXTENSIBLE-PCM is byte-compatible with standard PCM but carries a 40+ byte fmt chunk
// the streaming pipeline never expects. Normalize to a plain 44-byte PCM WAV at storage
// time so the vault only ever holds standard PCM and the client decode path stays unchanged.
var storedBuffer = wavInfo.IsExtensible ? NormalizeToStandardPcm(buffer, wavInfo) : buffer;
var parameters = new AudioBinaryParams(
Buffer: buffer,
Size: buffer.Length,
Buffer: storedBuffer,
Size: storedBuffer.Length,
Extension: ".wav",
Duration: wavInfo.Duration,
Bitrate: wavInfo.Bitrate
@@ -156,9 +161,30 @@ public class AudioProcessor
return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" };
}
// Validate audio format (PCM only)
// Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE
// (0xFFFE) is accepted only when its SubFormat GUID indicates PCM — the raw sample data is
// then byte-identical to standard PCM and we normalize it downstream.
var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8);
if (audioFormat != 1)
var isExtensible = false;
if (audioFormat == 0xFFFE)
{
// EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes.
if (fmtChunkSize < 40)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" };
}
// SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its
// first two bytes are the little-endian format tag; 0x0001 == WAVE_FORMAT_PCM.
var subFormatPos = fmtChunkPos + 8 + 24;
if (buffer[subFormatPos] != 0x01 || buffer[subFormatPos + 1] != 0x00)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is not PCM" };
}
isExtensible = true;
}
else if (audioFormat != 1)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" };
}
@@ -170,11 +196,12 @@ public class AudioProcessor
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" };
}
return new WavValidationResult
{
IsValid = true,
return new WavValidationResult
{
IsValid = true,
FmtChunkPos = fmtChunkPos,
DataChunkPos = dataChunkPos
DataChunkPos = dataChunkPos,
IsExtensible = isExtensible
};
}
@@ -190,6 +217,14 @@ public class AudioProcessor
var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22);
var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4);
// For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in
// wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24)
// but the valid bits are authoritative for the normalized header and metadata.
if (validation.IsExtensible)
{
bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18);
}
var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0;
var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000);
@@ -201,7 +236,9 @@ public class AudioProcessor
Channels = channels,
BitsPerSample = bitsPerSample,
BlockAlign = blockAlign,
DataSize = (int)dataSize
DataSize = (int)dataSize,
DataChunkPos = validation.DataChunkPos,
IsExtensible = validation.IsExtensible
};
}
@@ -235,6 +272,48 @@ public class AudioProcessor
}
}
/// <summary>
/// Rebuilds an EXTENSIBLE-PCM WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1).
/// The sample bytes are copied verbatim — EXTENSIBLE-PCM data is byte-identical to standard PCM —
/// only the header is replaced, so the vault stores a format the streaming pipeline already handles.
/// </summary>
private byte[] NormalizeToStandardPcm(byte[] buffer, WavMetadata metadata)
{
// Clamp the declared data size to what is actually present; some encoders overshoot.
var dataStart = metadata.DataChunkPos + 8;
var available = buffer.Length - dataStart;
var dataSize = Math.Min(metadata.DataSize, available);
const int headerSize = 44;
var result = new byte[headerSize + dataSize];
var blockAlign = (ushort)(metadata.Channels * (metadata.BitsPerSample / 8));
var byteRate = (uint)(metadata.SampleRate * blockAlign);
// RIFF header
System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0);
BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4);
System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8);
// fmt chunk (standard 16-byte PCM)
System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12);
BitConverter.GetBytes((uint)16).CopyTo(result, 16);
BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM
BitConverter.GetBytes((ushort)metadata.Channels).CopyTo(result, 22);
BitConverter.GetBytes((uint)metadata.SampleRate).CopyTo(result, 24);
BitConverter.GetBytes(byteRate).CopyTo(result, 28);
BitConverter.GetBytes(blockAlign).CopyTo(result, 32);
BitConverter.GetBytes((ushort)metadata.BitsPerSample).CopyTo(result, 34);
// data chunk
System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36);
BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40);
Array.Copy(buffer, dataStart, result, headerSize, dataSize);
return result;
}
/// <summary>
/// Returns default WAV metadata for fallback scenarios
/// </summary>
@@ -305,6 +384,8 @@ public class AudioProcessor
public int BitsPerSample { get; set; }
public int BlockAlign { get; set; }
public int DataSize { get; set; }
public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
}
/// <summary>
@@ -316,6 +397,7 @@ public class AudioProcessor
public string ErrorMessage { get; set; } = string.Empty;
public int FmtChunkPos { get; set; }
public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
}
}