Files
deepdrft/DeepDrftContent/Processors/AudioProcessor.cs
T

817 lines
33 KiB
C#

using DeepDrftContent.FileDatabase.Models;
namespace DeepDrftContent.Processors;
/// <summary>
/// Service for processing audio files and extracting metadata
/// </summary>
public class AudioProcessor
{
// Header parsing never needs the audio body. Read the file in 64 KB steps until the data-chunk
// header is locatable, capping the window so a pathological file with an enormous pre-data header
// cannot drive an unbounded allocation — such a file simply falls through to default metadata and
// passthrough storage, the same outcome as any unparseable WAV.
private const int HeaderWindowStep = 64 * 1024;
private const int HeaderWindowCap = 8 * 1024 * 1024;
/// <summary>
/// Processes a WAV file into a <see cref="ProcessedAudio"/> store plan: extracts metadata from a
/// bounded header window (never the whole file) and returns a streamed writer for the canonical
/// vault bytes. Standard PCM is stored verbatim (passthrough copy); EXTENSIBLE-PCM / IEEE-float /
/// padded-container WAVs are normalized to a plain 44-byte standard-PCM WAV, written progressively
/// so the vault only ever holds a format the streaming pipeline already handles.
/// </summary>
/// <param name="filePath">Path to the WAV file</param>
public async Task<ProcessedAudio?> ProcessWavFileAsync(string filePath, CancellationToken cancellationToken = default)
{
if (!File.Exists(filePath))
{
throw new FileNotFoundException($"WAV file not found: {filePath}");
}
if (!Path.GetExtension(filePath).Equals(".wav", StringComparison.OrdinalIgnoreCase))
{
throw new ArgumentException("File must be a WAV file", nameof(filePath));
}
try
{
var fileLength = new FileInfo(filePath).Length;
var window = await ReadWavHeaderWindowAsync(filePath, cancellationToken);
var wavInfo = ExtractWavMetadata(window);
if (!wavInfo.IsExtensible)
{
// Standard PCM (or the default-fallback path, which reports IsExtensible = false):
// the source bytes are already a format the pipeline handles, so store them verbatim.
return ProcessedAudio.Passthrough(filePath, ".wav", wavInfo.Duration, wavInfo.Bitrate, fileLength);
}
// EXTENSIBLE → streamed normalization. The output data size is derivable from the source
// data size alone (no body read needed): verbatim keeps it, float drops 1 byte per sample
// (4→3), padded keeps only the valid bytes per container sample.
var dataStart = (long)wavInfo.DataChunkPos + 8;
var available = fileLength - dataStart;
var srcDataSize = Math.Min((long)wavInfo.DataSize, available);
NormalizeMode mode;
int outBitsPerSample;
long outDataSize;
int containerBytes = 0;
int validBytes = 0;
if (wavInfo.IsFloat)
{
mode = NormalizeMode.Float;
outBitsPerSample = 24;
outDataSize = (srcDataSize / 4) * 3;
}
else if (wavInfo.IsPaddedContainer)
{
mode = NormalizeMode.Padded;
outBitsPerSample = wavInfo.BitsPerSample;
containerBytes = wavInfo.ContainerBitsPerSample / 8;
validBytes = wavInfo.BitsPerSample / 8;
outDataSize = (srcDataSize / containerBytes) * validBytes;
}
else
{
mode = NormalizeMode.Verbatim;
outBitsPerSample = wavInfo.BitsPerSample;
outDataSize = srcDataSize;
}
var channels = wavInfo.Channels;
var sampleRate = wavInfo.SampleRate;
return new ProcessedAudio(
".wav", wavInfo.Duration, wavInfo.Bitrate, 44 + outDataSize,
(destination, ct) => WriteNormalizedWavAsync(
filePath, dataStart, srcDataSize, channels, sampleRate, outBitsPerSample,
outDataSize, mode, containerBytes, validBytes, destination, ct));
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
throw new InvalidOperationException($"Failed to process WAV file: {ex.Message}", ex);
}
}
/// <summary>
/// Reads only enough of the file to contain the fmt chunk and the data chunk's 8-byte header, so
/// metadata parsing never loads the (potentially ~GB) audio body. Grows the window in 64 KB steps
/// until the data chunk is locatable or EOF/<see cref="HeaderWindowCap"/> is hit.
/// </summary>
private static async Task<byte[]> ReadWavHeaderWindowAsync(string filePath, CancellationToken ct)
{
await using var fs = new FileStream(
filePath, FileMode.Open, FileAccess.Read, FileShare.Read,
bufferSize: HeaderWindowStep, useAsync: true);
using var ms = new MemoryStream();
var buffer = new byte[HeaderWindowStep];
while (ms.Length < HeaderWindowCap)
{
var read = await fs.ReadAsync(buffer, ct);
if (read == 0)
break;
ms.Write(buffer, 0, read);
// FindChunk returns -1 on a partial window (the data chunk isn't reachable yet), so keep
// reading until it is found or the cap/EOF is hit. On normal files the data chunk header
// sits within the first 64 KB, so this loop runs exactly once.
var soFar = ms.ToArray();
if (FindChunk(soFar, "data") >= 0)
return soFar;
}
return ms.ToArray();
}
/// <summary>
/// Writes a normalized standard-PCM WAV to <paramref name="destination"/>: the 44-byte header
/// followed by the data region streamed from the source in bounded, sample-aligned chunks. No
/// whole-file buffer is ever held — peak memory is O(chunk), independent of duration.
/// </summary>
private async Task WriteNormalizedWavAsync(
string sourcePath, long dataStart, long srcDataSize,
int channels, int sampleRate, int outBitsPerSample, long outDataSize,
NormalizeMode mode, int containerBytes, int validBytes,
Stream destination, CancellationToken ct)
{
var header = BuildStandardPcmHeader(channels, sampleRate, outBitsPerSample, outDataSize);
await destination.WriteAsync(header, ct);
await using var src = new FileStream(
sourcePath, FileMode.Open, FileAccess.Read, FileShare.Read,
bufferSize: 81920, useAsync: true);
src.Seek(dataStart, SeekOrigin.Begin);
switch (mode)
{
case NormalizeMode.Verbatim:
await CopyBoundedAsync(src, destination, srcDataSize, ct);
break;
case NormalizeMode.Float:
// Each 4-byte float sample becomes 3 bytes of 24-bit PCM.
await TransformBoundedAsync(src, destination, srcDataSize, unit: 4,
transform: (buf, len) => ConvertFloatTo24BitPcm(buf, 0, len), ct);
break;
case NormalizeMode.Padded:
await TransformBoundedAsync(src, destination, srcDataSize, unit: containerBytes,
transform: (buf, len) => RepackPaddedContainer(buf, 0, len, containerBytes * 8, validBytes * 8), ct);
break;
}
}
/// <summary>Bounded copy of exactly <paramref name="totalBytes"/> from src to dest.</summary>
private static async Task CopyBoundedAsync(Stream src, Stream dest, long totalBytes, CancellationToken ct)
{
var buffer = new byte[81920];
var remaining = totalBytes;
while (remaining > 0)
{
var want = (int)Math.Min(buffer.Length, remaining);
var read = await src.ReadAsync(buffer.AsMemory(0, want), ct);
if (read == 0)
break;
await dest.WriteAsync(buffer.AsMemory(0, read), ct);
remaining -= read;
}
}
/// <summary>
/// Streams <paramref name="totalBytes"/> of source data through <paramref name="transform"/> in
/// sample-aligned chunks, writing each transformed chunk to <paramref name="dest"/>. The read
/// buffer is a multiple of <paramref name="unit"/>; leftover bytes that do not complete a sample
/// are carried into the next read, and a final partial sample is dropped (matching the
/// whole-buffer transforms' integer-division behavior).
/// </summary>
private static async Task TransformBoundedAsync(
Stream src, Stream dest, long totalBytes, int unit,
Func<byte[], int, byte[]> transform, CancellationToken ct)
{
var bufLen = Math.Max(unit, (81920 / unit) * unit);
var buffer = new byte[bufLen];
var remaining = totalBytes;
var carried = 0;
while (remaining > 0)
{
var want = (int)Math.Min(bufLen - carried, remaining);
if (want == 0)
break;
var read = await src.ReadAsync(buffer.AsMemory(carried, want), ct);
if (read == 0)
break;
remaining -= read;
var filled = carried + read;
var whole = (filled / unit) * unit;
if (whole > 0)
{
var output = transform(buffer, whole);
await dest.WriteAsync(output, ct);
}
carried = filled - whole;
if (carried > 0)
Array.Copy(buffer, whole, buffer, 0, carried);
}
}
private enum NormalizeMode
{
/// <summary>Sample bytes already standard PCM (EXTENSIBLE-PCM, depth == container width).</summary>
Verbatim,
/// <summary>IEEE float samples converted to 24-bit PCM.</summary>
Float,
/// <summary>Padded container (e.g. 24-in-32) re-packed to the valid depth.</summary>
Padded
}
/// <summary>
/// Extracts the raw PCM data region and format parameters from a WAV buffer, reusing the
/// same chunk-walk and validation as metadata extraction. Returns null if the buffer is not
/// a valid PCM WAV (callers treat a null as "no profile computable" and continue) — unlike
/// <see cref="ExtractWavMetadata"/>, this does NOT fall back to synthetic defaults, because a
/// loudness profile over fabricated silence would be misleading.
/// </summary>
public PcmData? TryExtractPcm(ReadOnlySpan<byte> buffer)
{
// Copy the span to an array so the existing array-based parsers can be reused. The PCM
// slice returned is a view over this array (no second copy of the data region).
var bytes = buffer.ToArray();
var validation = ValidateWavStructure(bytes);
if (!validation.IsValid)
{
return null;
}
// Float and padded-container EXTENSIBLE require a sample-level transform to become integer PCM.
// TryExtractPcm feeds loudness analysis, not storage, and must not hand back float bytes
// mislabeled as integer PCM — out of scope here, so treat them as "no profile computable".
if (validation.IsFloat)
{
return null;
}
WavMetadata metadata;
try
{
metadata = ParseWavMetadata(bytes, validation);
ValidateAudioParameters(metadata);
if (metadata.IsPaddedContainer)
{
return null;
}
}
catch
{
return null;
}
// Data bytes begin 8 past the "data" chunk id (4 id + 4 size). Clamp the declared size to
// what is actually present — some encoders write a size that overshoots the file.
var dataStart = validation.DataChunkPos + 8;
if (dataStart > bytes.Length)
{
return null;
}
var available = bytes.Length - dataStart;
var dataLength = Math.Min(metadata.DataSize, available);
if (dataLength <= 0)
{
return null;
}
var pcm = new ReadOnlyMemory<byte>(bytes, dataStart, dataLength);
return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
}
/// <summary>
/// Reads only the WAV header region from <paramref name="stream"/> (a bounded window, never the
/// audio body) and returns where the PCM data region begins, how long it is, and the format
/// parameters needed to decode it — the streaming counterpart of <see cref="TryExtractPcm"/>. The
/// data length is clamped against <paramref name="totalFileLength"/> (the true backing-file size),
/// so the caller streams exactly the present PCM. Returns null for the same inputs
/// <see cref="TryExtractPcm"/> rejects — non-WAV bytes (mp3/flac), float, and padded-container
/// EXTENSIBLE — so the caller treats null as "no profile computable" and continues gracefully.
///
/// <paramref name="stream"/> must be positioned at the start; on return its position is past the
/// header window (the caller seeks to <c>DataStart</c> before streaming the body). No whole-file
/// buffer is allocated — peak memory is the bounded header window.
/// </summary>
public async Task<WavPcmStreamInfo?> TryReadPcmStreamInfoAsync(
Stream stream, long totalFileLength, CancellationToken cancellationToken = default)
{
var window = await ReadWavHeaderWindowAsync(stream, cancellationToken);
if (window is null)
{
return null;
}
var validation = ValidateWavStructure(window);
if (!validation.IsValid || validation.IsFloat)
{
return null;
}
WavMetadata metadata;
try
{
metadata = ParseWavMetadata(window, validation);
ValidateAudioParameters(metadata);
if (metadata.IsPaddedContainer)
{
return null;
}
}
catch
{
return null;
}
long dataStart = validation.DataChunkPos + 8;
if (dataStart > totalFileLength)
{
return null;
}
var available = totalFileLength - dataStart;
var dataLength = Math.Min((long)metadata.DataSize, available);
if (dataLength <= 0)
{
return null;
}
return new WavPcmStreamInfo(
dataStart, dataLength, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
}
/// <summary>
/// Reads enough of <paramref name="stream"/> to contain the fmt chunk and the data chunk's 8-byte
/// header, growing in 64 KB steps until the data chunk is locatable or EOF / the
/// <see cref="HeaderWindowCap"/> is reached. Bails after the first read when the bytes are not a
/// RIFF/WAVE container, so a non-WAV stream (mp3/flac) costs one read, not the full cap. Returns
/// null only when nothing could be read.
/// </summary>
private static async Task<byte[]?> ReadWavHeaderWindowAsync(Stream stream, CancellationToken ct)
{
using var ms = new MemoryStream();
var buffer = new byte[HeaderWindowStep];
while (ms.Length < HeaderWindowCap)
{
var read = await stream.ReadAsync(buffer, ct);
if (read == 0)
break;
ms.Write(buffer, 0, read);
var soFar = ms.ToArray();
// Early-out for non-WAV input: once at least the 12-byte RIFF/WAVE preamble is in hand,
// a missing signature means this will never be a WAV — stop rather than read to the cap.
if (soFar.Length >= 12 && !HasRiffWaveSignature(soFar))
return soFar;
// FindChunk returns -1 until the data chunk header is fully in the window; on a normal
// file it sits within the first 64 KB so this loop runs exactly once.
if (FindChunk(soFar, "data") >= 0)
return soFar;
}
return ms.Length > 0 ? ms.ToArray() : null;
}
private static bool HasRiffWaveSignature(byte[] buffer) =>
buffer.Length >= 12
&& System.Text.Encoding.ASCII.GetString(buffer, 0, 4) == "RIFF"
&& System.Text.Encoding.ASCII.GetString(buffer, 8, 4) == "WAVE";
/// <summary>
/// Extracts metadata from WAV file buffer with comprehensive validation
/// </summary>
private WavMetadata ExtractWavMetadata(byte[] buffer)
{
try
{
var validationResult = ValidateWavStructure(buffer);
if (!validationResult.IsValid)
{
throw new InvalidDataException($"WAV validation failed: {validationResult.ErrorMessage}");
}
var metadata = ParseWavMetadata(buffer, validationResult);
ValidateAudioParameters(metadata);
return metadata;
}
catch (Exception ex)
{
Console.WriteLine($"Warning: WAV parsing failed, using defaults: {ex.Message}");
return GetDefaultWavMetadata();
}
}
/// <summary>
/// Validates WAV file structure and returns parsing information
/// </summary>
private WavValidationResult ValidateWavStructure(byte[] buffer)
{
if (buffer.Length < 44)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "File too short" };
}
// Validate RIFF signature
var riffSignature = System.Text.Encoding.ASCII.GetString(buffer, 0, 4);
if (riffSignature != "RIFF")
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid RIFF signature" };
}
// Validate WAVE signature
var waveSignature = System.Text.Encoding.ASCII.GetString(buffer, 8, 4);
if (waveSignature != "WAVE")
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid WAVE signature" };
}
// Find and validate fmt chunk
var fmtChunkPos = FindChunk(buffer, "fmt ");
if (fmtChunkPos == -1)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing fmt chunk" };
}
var fmtChunkSize = BitConverter.ToUInt32(buffer, fmtChunkPos + 4);
if (fmtChunkSize < 16)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" };
}
// Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE
// (0xFFFE) is accepted when its SubFormat GUID indicates PCM (0x0001) or IEEE float
// (0x0003). PCM sample data is byte-identical to standard PCM; float data is converted to
// 24-bit PCM downstream. Either way the vault only ever holds standard PCM.
var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8);
var isExtensible = false;
var isFloat = false;
if (audioFormat == 0xFFFE)
{
// EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes.
if (fmtChunkSize < 40)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" };
}
if (fmtChunkPos + 8 + 40 > buffer.Length)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk extends past end of file" };
}
// SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its
// first two bytes are the little-endian format tag: 0x0001 == WAVE_FORMAT_PCM,
// 0x0003 == WAVE_FORMAT_IEEE_FLOAT.
var subFormatPos = fmtChunkPos + 8 + 24;
var subFormatTag = BitConverter.ToUInt16(buffer, subFormatPos);
if (subFormatTag == 0x0001)
{
isExtensible = true;
}
else if (subFormatTag == 0x0003)
{
isExtensible = true;
isFloat = true;
}
else
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is neither PCM nor IEEE float" };
}
}
else if (audioFormat != 1)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" };
}
// Find data chunk
var dataChunkPos = FindChunk(buffer, "data");
if (dataChunkPos == -1)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" };
}
return new WavValidationResult
{
IsValid = true,
FmtChunkPos = fmtChunkPos,
DataChunkPos = dataChunkPos,
IsExtensible = isExtensible,
IsFloat = isFloat
};
}
/// <summary>
/// Parses WAV metadata from validated buffer
/// </summary>
private WavMetadata ParseWavMetadata(byte[] buffer, WavValidationResult validation)
{
var channels = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 10);
var sampleRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 12);
var byteRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 16);
var blockAlign = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 20);
var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22);
var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4);
// For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in
// wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24)
// but the valid bits are authoritative for the normalized header and metadata. When they
// differ (e.g. 24-bit valid in a 32-bit container) we keep the container width separately so
// ValidateAudioParameters can reconcile against the header BlockAlign and NormalizeToStandardPcm
// can re-pack the padded frames.
var containerBitsPerSample = 0;
if (validation.IsExtensible)
{
var validBits = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18);
if (validBits != bitsPerSample)
{
containerBitsPerSample = bitsPerSample;
}
bitsPerSample = validBits;
}
var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0;
var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000);
return new WavMetadata
{
Duration = duration,
Bitrate = bitrate,
SampleRate = (int)sampleRate,
Channels = channels,
BitsPerSample = bitsPerSample,
ContainerBitsPerSample = containerBitsPerSample,
BlockAlign = blockAlign,
DataSize = (int)dataSize,
DataChunkPos = validation.DataChunkPos,
IsExtensible = validation.IsExtensible,
IsFloat = validation.IsFloat
};
}
/// <summary>
/// Validates audio parameters for reasonableness
/// </summary>
private void ValidateAudioParameters(WavMetadata metadata)
{
var validSampleRates = new[] { 8000, 11025, 16000, 22050, 44100, 48000, 88200, 96000, 176400, 192000 };
var validBitDepths = new[] { 8, 16, 24, 32 };
if (metadata.Channels < 1 || metadata.Channels > 8)
{
throw new InvalidDataException($"Invalid channel count: {metadata.Channels}");
}
if (!validSampleRates.Contains(metadata.SampleRate))
{
throw new InvalidDataException($"Unsupported sample rate: {metadata.SampleRate}");
}
if (!validBitDepths.Contains(metadata.BitsPerSample))
{
throw new InvalidDataException($"Unsupported bit depth: {metadata.BitsPerSample}");
}
// The header BlockAlign reflects the container width, not the valid bit depth. For a padded
// EXTENSIBLE container (e.g. 24-in-32) the container width is authoritative for this check;
// NormalizeToStandardPcm re-packs the frames down to the valid depth afterwards.
var blockAlignBits = metadata.IsPaddedContainer ? metadata.ContainerBitsPerSample : metadata.BitsPerSample;
var expectedBlockAlign = metadata.Channels * (blockAlignBits / 8);
if (metadata.BlockAlign != expectedBlockAlign)
{
throw new InvalidDataException($"Invalid block align: expected {expectedBlockAlign}, got {metadata.BlockAlign}");
}
}
/// <summary>
/// Builds the canonical 44-byte standard-PCM WAV header (audioFormat = 1) for a normalized stream.
/// The body is written separately so no whole-file buffer is allocated; this only emits the header
/// the streaming pipeline expects, reporting the valid (post-normalization) bit depth.
/// </summary>
private static byte[] BuildStandardPcmHeader(int channels, int sampleRate, int outBitsPerSample, long dataSize)
{
const int headerSize = 44;
var result = new byte[headerSize];
var blockAlign = (ushort)(channels * (outBitsPerSample / 8));
var byteRate = (uint)(sampleRate * blockAlign);
// RIFF header
System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0);
BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4);
System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8);
// fmt chunk (standard 16-byte PCM)
System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12);
BitConverter.GetBytes((uint)16).CopyTo(result, 16);
BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM
BitConverter.GetBytes((ushort)channels).CopyTo(result, 22);
BitConverter.GetBytes((uint)sampleRate).CopyTo(result, 24);
BitConverter.GetBytes(byteRate).CopyTo(result, 28);
BitConverter.GetBytes(blockAlign).CopyTo(result, 32);
BitConverter.GetBytes((ushort)outBitsPerSample).CopyTo(result, 34);
// data chunk
System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36);
BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40);
return result;
}
/// <summary>
/// Converts 32-bit little-endian IEEE float samples (range [-1.0, 1.0]) to 24-bit signed PCM.
/// Each 4-byte source sample becomes 3 little-endian output bytes; output size is 3/4 of input.
/// Trailing bytes that do not form a complete 4-byte sample are ignored.
/// </summary>
private static byte[] ConvertFloatTo24BitPcm(byte[] buffer, int dataStart, int dataSize)
{
var sampleCount = dataSize / 4;
var output = new byte[sampleCount * 3];
for (int i = 0; i < sampleCount; i++)
{
var sample = BitConverter.ToSingle(buffer, dataStart + i * 4);
var value = (int)(sample * 8388607.0);
value = Math.Clamp(value, -8388608, 8388607);
var o = i * 3;
output[o] = (byte)(value & 0xFF);
output[o + 1] = (byte)((value >> 8) & 0xFF);
output[o + 2] = (byte)((value >> 16) & 0xFF);
}
return output;
}
/// <summary>
/// Strips container padding from a padded-container EXTENSIBLE WAV (e.g. 24-bit valid samples
/// stored in 32-bit containers), keeping only the lowest <paramref name="validBits"/> bytes of
/// each little-endian sample. Output size is (validBits/containerBits) of input.
/// Trailing bytes that do not form a complete container sample are ignored.
/// </summary>
private static byte[] RepackPaddedContainer(byte[] buffer, int dataStart, int dataSize, int containerBits, int validBits)
{
var containerBytes = containerBits / 8;
var validBytes = validBits / 8;
var sampleCount = dataSize / containerBytes;
var output = new byte[sampleCount * validBytes];
for (int i = 0; i < sampleCount; i++)
{
var src = dataStart + i * containerBytes;
var dst = i * validBytes;
// Little-endian: the valid sample occupies the low bytes; the upper bytes are padding /
// sign extension and are discarded.
for (int b = 0; b < validBytes; b++)
{
output[dst + b] = buffer[src + b];
}
}
return output;
}
/// <summary>
/// Returns default WAV metadata for fallback scenarios
/// </summary>
private WavMetadata GetDefaultWavMetadata()
{
return new WavMetadata
{
Duration = 180.0,
Bitrate = 1411,
SampleRate = 44100,
Channels = 2,
BitsPerSample = 16,
BlockAlign = 4,
DataSize = 0
};
}
/// <summary>
/// Finds a chunk in the WAV file buffer with proper alignment handling
/// </summary>
private static int FindChunk(byte[] buffer, string chunkId)
{
var chunkBytes = System.Text.Encoding.ASCII.GetBytes(chunkId);
int offset = 12; // Start after RIFF header
while (offset <= buffer.Length - 8)
{
// Check for chunk signature match
bool match = true;
for (int i = 0; i < 4; i++)
{
if (buffer[offset + i] != chunkBytes[i])
{
match = false;
break;
}
}
if (match)
{
return offset;
}
// Move to next chunk with proper alignment
if (offset + 4 < buffer.Length)
{
var chunkSize = BitConverter.ToUInt32(buffer, offset + 4);
offset += 8 + (int)((chunkSize + 1) & ~1U); // Ensure even alignment
}
else
{
break;
}
}
return -1;
}
/// <summary>
/// WAV file metadata with complete audio information
/// </summary>
private class WavMetadata
{
public double Duration { get; set; }
public int Bitrate { get; set; }
public int SampleRate { get; set; }
public int Channels { get; set; }
/// <summary>The valid sample depth — for EXTENSIBLE, wValidBitsPerSample.</summary>
public int BitsPerSample { get; set; }
/// <summary>
/// The container sample width for a padded EXTENSIBLE WAV whose valid depth is narrower
/// (e.g. 32 for a 24-in-32 file). Zero when the container matches the valid depth.
/// </summary>
public int ContainerBitsPerSample { get; set; }
public int BlockAlign { get; set; }
public int DataSize { get; set; }
public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
/// <summary>True when the SubFormat is IEEE float (converted to 24-bit PCM on normalization).</summary>
public bool IsFloat { get; set; }
/// <summary>True when valid samples are stored in a wider container that must be re-packed.</summary>
public bool IsPaddedContainer => ContainerBitsPerSample != 0 && ContainerBitsPerSample != BitsPerSample;
}
/// <summary>
/// Result of WAV structure validation
/// </summary>
private class WavValidationResult
{
public bool IsValid { get; set; }
public string ErrorMessage { get; set; } = string.Empty;
public int FmtChunkPos { get; set; }
public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
/// <summary>True when the EXTENSIBLE SubFormat is IEEE float rather than PCM.</summary>
public bool IsFloat { get; set; }
}
}
/// <summary>
/// The raw PCM sample region of a WAV plus the format parameters needed to interpret it.
/// <see cref="Pcm"/> is a view over the decoded buffer — the data chunk only, header excluded.
/// </summary>
/// <param name="Pcm">The PCM sample bytes (interleaved by channel, little-endian).</param>
/// <param name="Channels">Number of interleaved channels.</param>
/// <param name="SampleRate">Samples per second.</param>
/// <param name="BitsPerSample">Bit depth per sample (8, 16, 24, or 32).</param>
public readonly record struct PcmData(
ReadOnlyMemory<byte> Pcm,
int Channels,
int SampleRate,
int BitsPerSample);
/// <summary>
/// Where a WAV's PCM data region lives and how to decode it, without the bytes themselves — the
/// streaming counterpart of <see cref="PcmData"/>. The caller seeks to <see cref="DataStart"/> and
/// streams exactly <see cref="DataLength"/> bytes through a loudness accumulator.
/// </summary>
/// <param name="DataStart">Absolute byte offset of the first PCM sample (past the data chunk header).</param>
/// <param name="DataLength">PCM region length in bytes, clamped to what the backing file actually holds.</param>
/// <param name="Channels">Number of interleaved channels.</param>
/// <param name="SampleRate">Samples per second.</param>
/// <param name="BitsPerSample">Bit depth per sample (8, 16, 24, or 32).</param>
public readonly record struct WavPcmStreamInfo(
long DataStart,
long DataLength,
int Channels,
int SampleRate,
int BitsPerSample);