79bbbd4956
Processors now emit a ProcessedAudio plan with a streamed writer instead of a whole-file AudioBinary; vault writes stream via RegisterResourceStreamingAsync. Header parsing is bounded. Wave 2 (waveform/Opus) still re-reads the full file by design.
701 lines
28 KiB
C#
701 lines
28 KiB
C#
using DeepDrftContent.FileDatabase.Models;
|
|
|
|
namespace DeepDrftContent.Processors;
|
|
|
|
/// <summary>
|
|
/// Service for processing audio files and extracting metadata
|
|
/// </summary>
|
|
public class AudioProcessor
|
|
{
|
|
// Header parsing never needs the audio body. Read the file in 64 KB steps until the data-chunk
|
|
// header is locatable, capping the window so a pathological file with an enormous pre-data header
|
|
// cannot drive an unbounded allocation — such a file simply falls through to default metadata and
|
|
// passthrough storage, the same outcome as any unparseable WAV.
|
|
private const int HeaderWindowStep = 64 * 1024;
|
|
private const int HeaderWindowCap = 8 * 1024 * 1024;
|
|
|
|
/// <summary>
|
|
/// Processes a WAV file into a <see cref="ProcessedAudio"/> store plan: extracts metadata from a
|
|
/// bounded header window (never the whole file) and returns a streamed writer for the canonical
|
|
/// vault bytes. Standard PCM is stored verbatim (passthrough copy); EXTENSIBLE-PCM / IEEE-float /
|
|
/// padded-container WAVs are normalized to a plain 44-byte standard-PCM WAV, written progressively
|
|
/// so the vault only ever holds a format the streaming pipeline already handles.
|
|
/// </summary>
|
|
/// <param name="filePath">Path to the WAV file</param>
|
|
public async Task<ProcessedAudio?> ProcessWavFileAsync(string filePath, CancellationToken cancellationToken = default)
|
|
{
|
|
if (!File.Exists(filePath))
|
|
{
|
|
throw new FileNotFoundException($"WAV file not found: {filePath}");
|
|
}
|
|
|
|
if (!Path.GetExtension(filePath).Equals(".wav", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
throw new ArgumentException("File must be a WAV file", nameof(filePath));
|
|
}
|
|
|
|
try
|
|
{
|
|
var fileLength = new FileInfo(filePath).Length;
|
|
var window = await ReadWavHeaderWindowAsync(filePath, cancellationToken);
|
|
var wavInfo = ExtractWavMetadata(window);
|
|
|
|
if (!wavInfo.IsExtensible)
|
|
{
|
|
// Standard PCM (or the default-fallback path, which reports IsExtensible = false):
|
|
// the source bytes are already a format the pipeline handles, so store them verbatim.
|
|
return ProcessedAudio.Passthrough(filePath, ".wav", wavInfo.Duration, wavInfo.Bitrate, fileLength);
|
|
}
|
|
|
|
// EXTENSIBLE → streamed normalization. The output data size is derivable from the source
|
|
// data size alone (no body read needed): verbatim keeps it, float drops 1 byte per sample
|
|
// (4→3), padded keeps only the valid bytes per container sample.
|
|
var dataStart = (long)wavInfo.DataChunkPos + 8;
|
|
var available = fileLength - dataStart;
|
|
var srcDataSize = Math.Min((long)wavInfo.DataSize, available);
|
|
|
|
NormalizeMode mode;
|
|
int outBitsPerSample;
|
|
long outDataSize;
|
|
int containerBytes = 0;
|
|
int validBytes = 0;
|
|
if (wavInfo.IsFloat)
|
|
{
|
|
mode = NormalizeMode.Float;
|
|
outBitsPerSample = 24;
|
|
outDataSize = (srcDataSize / 4) * 3;
|
|
}
|
|
else if (wavInfo.IsPaddedContainer)
|
|
{
|
|
mode = NormalizeMode.Padded;
|
|
outBitsPerSample = wavInfo.BitsPerSample;
|
|
containerBytes = wavInfo.ContainerBitsPerSample / 8;
|
|
validBytes = wavInfo.BitsPerSample / 8;
|
|
outDataSize = (srcDataSize / containerBytes) * validBytes;
|
|
}
|
|
else
|
|
{
|
|
mode = NormalizeMode.Verbatim;
|
|
outBitsPerSample = wavInfo.BitsPerSample;
|
|
outDataSize = srcDataSize;
|
|
}
|
|
|
|
var channels = wavInfo.Channels;
|
|
var sampleRate = wavInfo.SampleRate;
|
|
|
|
return new ProcessedAudio(
|
|
".wav", wavInfo.Duration, wavInfo.Bitrate, 44 + outDataSize,
|
|
(destination, ct) => WriteNormalizedWavAsync(
|
|
filePath, dataStart, srcDataSize, channels, sampleRate, outBitsPerSample,
|
|
outDataSize, mode, containerBytes, validBytes, destination, ct));
|
|
}
|
|
catch (Exception ex) when (ex is not OperationCanceledException)
|
|
{
|
|
throw new InvalidOperationException($"Failed to process WAV file: {ex.Message}", ex);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reads only enough of the file to contain the fmt chunk and the data chunk's 8-byte header, so
|
|
/// metadata parsing never loads the (potentially ~GB) audio body. Grows the window in 64 KB steps
|
|
/// until the data chunk is locatable or EOF/<see cref="HeaderWindowCap"/> is hit.
|
|
/// </summary>
|
|
private static async Task<byte[]> ReadWavHeaderWindowAsync(string filePath, CancellationToken ct)
|
|
{
|
|
await using var fs = new FileStream(
|
|
filePath, FileMode.Open, FileAccess.Read, FileShare.Read,
|
|
bufferSize: HeaderWindowStep, useAsync: true);
|
|
|
|
using var ms = new MemoryStream();
|
|
var buffer = new byte[HeaderWindowStep];
|
|
while (ms.Length < HeaderWindowCap)
|
|
{
|
|
var read = await fs.ReadAsync(buffer, ct);
|
|
if (read == 0)
|
|
break;
|
|
ms.Write(buffer, 0, read);
|
|
|
|
// FindChunk returns -1 on a partial window (the data chunk isn't reachable yet), so keep
|
|
// reading until it is found or the cap/EOF is hit. On normal files the data chunk header
|
|
// sits within the first 64 KB, so this loop runs exactly once.
|
|
var soFar = ms.ToArray();
|
|
if (FindChunk(soFar, "data") >= 0)
|
|
return soFar;
|
|
}
|
|
|
|
return ms.ToArray();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Writes a normalized standard-PCM WAV to <paramref name="destination"/>: the 44-byte header
|
|
/// followed by the data region streamed from the source in bounded, sample-aligned chunks. No
|
|
/// whole-file buffer is ever held — peak memory is O(chunk), independent of duration.
|
|
/// </summary>
|
|
private async Task WriteNormalizedWavAsync(
|
|
string sourcePath, long dataStart, long srcDataSize,
|
|
int channels, int sampleRate, int outBitsPerSample, long outDataSize,
|
|
NormalizeMode mode, int containerBytes, int validBytes,
|
|
Stream destination, CancellationToken ct)
|
|
{
|
|
var header = BuildStandardPcmHeader(channels, sampleRate, outBitsPerSample, outDataSize);
|
|
await destination.WriteAsync(header, ct);
|
|
|
|
await using var src = new FileStream(
|
|
sourcePath, FileMode.Open, FileAccess.Read, FileShare.Read,
|
|
bufferSize: 81920, useAsync: true);
|
|
src.Seek(dataStart, SeekOrigin.Begin);
|
|
|
|
switch (mode)
|
|
{
|
|
case NormalizeMode.Verbatim:
|
|
await CopyBoundedAsync(src, destination, srcDataSize, ct);
|
|
break;
|
|
case NormalizeMode.Float:
|
|
// Each 4-byte float sample becomes 3 bytes of 24-bit PCM.
|
|
await TransformBoundedAsync(src, destination, srcDataSize, unit: 4,
|
|
transform: (buf, len) => ConvertFloatTo24BitPcm(buf, 0, len), ct);
|
|
break;
|
|
case NormalizeMode.Padded:
|
|
await TransformBoundedAsync(src, destination, srcDataSize, unit: containerBytes,
|
|
transform: (buf, len) => RepackPaddedContainer(buf, 0, len, containerBytes * 8, validBytes * 8), ct);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/// <summary>Bounded copy of exactly <paramref name="totalBytes"/> from src to dest.</summary>
|
|
private static async Task CopyBoundedAsync(Stream src, Stream dest, long totalBytes, CancellationToken ct)
|
|
{
|
|
var buffer = new byte[81920];
|
|
var remaining = totalBytes;
|
|
while (remaining > 0)
|
|
{
|
|
var want = (int)Math.Min(buffer.Length, remaining);
|
|
var read = await src.ReadAsync(buffer.AsMemory(0, want), ct);
|
|
if (read == 0)
|
|
break;
|
|
await dest.WriteAsync(buffer.AsMemory(0, read), ct);
|
|
remaining -= read;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Streams <paramref name="totalBytes"/> of source data through <paramref name="transform"/> in
|
|
/// sample-aligned chunks, writing each transformed chunk to <paramref name="dest"/>. The read
|
|
/// buffer is a multiple of <paramref name="unit"/>; leftover bytes that do not complete a sample
|
|
/// are carried into the next read, and a final partial sample is dropped (matching the
|
|
/// whole-buffer transforms' integer-division behavior).
|
|
/// </summary>
|
|
private static async Task TransformBoundedAsync(
|
|
Stream src, Stream dest, long totalBytes, int unit,
|
|
Func<byte[], int, byte[]> transform, CancellationToken ct)
|
|
{
|
|
var bufLen = Math.Max(unit, (81920 / unit) * unit);
|
|
var buffer = new byte[bufLen];
|
|
var remaining = totalBytes;
|
|
var carried = 0;
|
|
while (remaining > 0)
|
|
{
|
|
var want = (int)Math.Min(bufLen - carried, remaining);
|
|
if (want == 0)
|
|
break;
|
|
var read = await src.ReadAsync(buffer.AsMemory(carried, want), ct);
|
|
if (read == 0)
|
|
break;
|
|
remaining -= read;
|
|
|
|
var filled = carried + read;
|
|
var whole = (filled / unit) * unit;
|
|
if (whole > 0)
|
|
{
|
|
var output = transform(buffer, whole);
|
|
await dest.WriteAsync(output, ct);
|
|
}
|
|
|
|
carried = filled - whole;
|
|
if (carried > 0)
|
|
Array.Copy(buffer, whole, buffer, 0, carried);
|
|
}
|
|
}
|
|
|
|
private enum NormalizeMode
|
|
{
|
|
/// <summary>Sample bytes already standard PCM (EXTENSIBLE-PCM, depth == container width).</summary>
|
|
Verbatim,
|
|
/// <summary>IEEE float samples converted to 24-bit PCM.</summary>
|
|
Float,
|
|
/// <summary>Padded container (e.g. 24-in-32) re-packed to the valid depth.</summary>
|
|
Padded
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts the raw PCM data region and format parameters from a WAV buffer, reusing the
|
|
/// same chunk-walk and validation as metadata extraction. Returns null if the buffer is not
|
|
/// a valid PCM WAV (callers treat a null as "no profile computable" and continue) — unlike
|
|
/// <see cref="ExtractWavMetadata"/>, this does NOT fall back to synthetic defaults, because a
|
|
/// loudness profile over fabricated silence would be misleading.
|
|
/// </summary>
|
|
public PcmData? TryExtractPcm(ReadOnlySpan<byte> buffer)
|
|
{
|
|
// Copy the span to an array so the existing array-based parsers can be reused. The PCM
|
|
// slice returned is a view over this array (no second copy of the data region).
|
|
var bytes = buffer.ToArray();
|
|
|
|
var validation = ValidateWavStructure(bytes);
|
|
if (!validation.IsValid)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
// Float and padded-container EXTENSIBLE require a sample-level transform to become integer PCM.
|
|
// TryExtractPcm feeds loudness analysis, not storage, and must not hand back float bytes
|
|
// mislabeled as integer PCM — out of scope here, so treat them as "no profile computable".
|
|
if (validation.IsFloat)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
WavMetadata metadata;
|
|
try
|
|
{
|
|
metadata = ParseWavMetadata(bytes, validation);
|
|
ValidateAudioParameters(metadata);
|
|
if (metadata.IsPaddedContainer)
|
|
{
|
|
return null;
|
|
}
|
|
}
|
|
catch
|
|
{
|
|
return null;
|
|
}
|
|
|
|
// Data bytes begin 8 past the "data" chunk id (4 id + 4 size). Clamp the declared size to
|
|
// what is actually present — some encoders write a size that overshoots the file.
|
|
var dataStart = validation.DataChunkPos + 8;
|
|
if (dataStart > bytes.Length)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var available = bytes.Length - dataStart;
|
|
var dataLength = Math.Min(metadata.DataSize, available);
|
|
if (dataLength <= 0)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var pcm = new ReadOnlyMemory<byte>(bytes, dataStart, dataLength);
|
|
return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts metadata from WAV file buffer with comprehensive validation
|
|
/// </summary>
|
|
private WavMetadata ExtractWavMetadata(byte[] buffer)
|
|
{
|
|
try
|
|
{
|
|
var validationResult = ValidateWavStructure(buffer);
|
|
if (!validationResult.IsValid)
|
|
{
|
|
throw new InvalidDataException($"WAV validation failed: {validationResult.ErrorMessage}");
|
|
}
|
|
|
|
var metadata = ParseWavMetadata(buffer, validationResult);
|
|
ValidateAudioParameters(metadata);
|
|
|
|
return metadata;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine($"Warning: WAV parsing failed, using defaults: {ex.Message}");
|
|
return GetDefaultWavMetadata();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Validates WAV file structure and returns parsing information
|
|
/// </summary>
|
|
private WavValidationResult ValidateWavStructure(byte[] buffer)
|
|
{
|
|
if (buffer.Length < 44)
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "File too short" };
|
|
}
|
|
|
|
// Validate RIFF signature
|
|
var riffSignature = System.Text.Encoding.ASCII.GetString(buffer, 0, 4);
|
|
if (riffSignature != "RIFF")
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid RIFF signature" };
|
|
}
|
|
|
|
// Validate WAVE signature
|
|
var waveSignature = System.Text.Encoding.ASCII.GetString(buffer, 8, 4);
|
|
if (waveSignature != "WAVE")
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid WAVE signature" };
|
|
}
|
|
|
|
// Find and validate fmt chunk
|
|
var fmtChunkPos = FindChunk(buffer, "fmt ");
|
|
if (fmtChunkPos == -1)
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing fmt chunk" };
|
|
}
|
|
|
|
var fmtChunkSize = BitConverter.ToUInt32(buffer, fmtChunkPos + 4);
|
|
if (fmtChunkSize < 16)
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" };
|
|
}
|
|
|
|
// Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE
|
|
// (0xFFFE) is accepted when its SubFormat GUID indicates PCM (0x0001) or IEEE float
|
|
// (0x0003). PCM sample data is byte-identical to standard PCM; float data is converted to
|
|
// 24-bit PCM downstream. Either way the vault only ever holds standard PCM.
|
|
var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8);
|
|
var isExtensible = false;
|
|
var isFloat = false;
|
|
if (audioFormat == 0xFFFE)
|
|
{
|
|
// EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes.
|
|
if (fmtChunkSize < 40)
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" };
|
|
}
|
|
|
|
if (fmtChunkPos + 8 + 40 > buffer.Length)
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk extends past end of file" };
|
|
}
|
|
|
|
// SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its
|
|
// first two bytes are the little-endian format tag: 0x0001 == WAVE_FORMAT_PCM,
|
|
// 0x0003 == WAVE_FORMAT_IEEE_FLOAT.
|
|
var subFormatPos = fmtChunkPos + 8 + 24;
|
|
var subFormatTag = BitConverter.ToUInt16(buffer, subFormatPos);
|
|
if (subFormatTag == 0x0001)
|
|
{
|
|
isExtensible = true;
|
|
}
|
|
else if (subFormatTag == 0x0003)
|
|
{
|
|
isExtensible = true;
|
|
isFloat = true;
|
|
}
|
|
else
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is neither PCM nor IEEE float" };
|
|
}
|
|
}
|
|
else if (audioFormat != 1)
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" };
|
|
}
|
|
|
|
// Find data chunk
|
|
var dataChunkPos = FindChunk(buffer, "data");
|
|
if (dataChunkPos == -1)
|
|
{
|
|
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" };
|
|
}
|
|
|
|
return new WavValidationResult
|
|
{
|
|
IsValid = true,
|
|
FmtChunkPos = fmtChunkPos,
|
|
DataChunkPos = dataChunkPos,
|
|
IsExtensible = isExtensible,
|
|
IsFloat = isFloat
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Parses WAV metadata from validated buffer
|
|
/// </summary>
|
|
private WavMetadata ParseWavMetadata(byte[] buffer, WavValidationResult validation)
|
|
{
|
|
var channels = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 10);
|
|
var sampleRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 12);
|
|
var byteRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 16);
|
|
var blockAlign = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 20);
|
|
var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22);
|
|
var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4);
|
|
|
|
// For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in
|
|
// wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24)
|
|
// but the valid bits are authoritative for the normalized header and metadata. When they
|
|
// differ (e.g. 24-bit valid in a 32-bit container) we keep the container width separately so
|
|
// ValidateAudioParameters can reconcile against the header BlockAlign and NormalizeToStandardPcm
|
|
// can re-pack the padded frames.
|
|
var containerBitsPerSample = 0;
|
|
if (validation.IsExtensible)
|
|
{
|
|
var validBits = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18);
|
|
if (validBits != bitsPerSample)
|
|
{
|
|
containerBitsPerSample = bitsPerSample;
|
|
}
|
|
bitsPerSample = validBits;
|
|
}
|
|
|
|
var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0;
|
|
var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000);
|
|
|
|
return new WavMetadata
|
|
{
|
|
Duration = duration,
|
|
Bitrate = bitrate,
|
|
SampleRate = (int)sampleRate,
|
|
Channels = channels,
|
|
BitsPerSample = bitsPerSample,
|
|
ContainerBitsPerSample = containerBitsPerSample,
|
|
BlockAlign = blockAlign,
|
|
DataSize = (int)dataSize,
|
|
DataChunkPos = validation.DataChunkPos,
|
|
IsExtensible = validation.IsExtensible,
|
|
IsFloat = validation.IsFloat
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Validates audio parameters for reasonableness
|
|
/// </summary>
|
|
private void ValidateAudioParameters(WavMetadata metadata)
|
|
{
|
|
var validSampleRates = new[] { 8000, 11025, 16000, 22050, 44100, 48000, 88200, 96000, 176400, 192000 };
|
|
var validBitDepths = new[] { 8, 16, 24, 32 };
|
|
|
|
if (metadata.Channels < 1 || metadata.Channels > 8)
|
|
{
|
|
throw new InvalidDataException($"Invalid channel count: {metadata.Channels}");
|
|
}
|
|
|
|
if (!validSampleRates.Contains(metadata.SampleRate))
|
|
{
|
|
throw new InvalidDataException($"Unsupported sample rate: {metadata.SampleRate}");
|
|
}
|
|
|
|
if (!validBitDepths.Contains(metadata.BitsPerSample))
|
|
{
|
|
throw new InvalidDataException($"Unsupported bit depth: {metadata.BitsPerSample}");
|
|
}
|
|
|
|
// The header BlockAlign reflects the container width, not the valid bit depth. For a padded
|
|
// EXTENSIBLE container (e.g. 24-in-32) the container width is authoritative for this check;
|
|
// NormalizeToStandardPcm re-packs the frames down to the valid depth afterwards.
|
|
var blockAlignBits = metadata.IsPaddedContainer ? metadata.ContainerBitsPerSample : metadata.BitsPerSample;
|
|
var expectedBlockAlign = metadata.Channels * (blockAlignBits / 8);
|
|
if (metadata.BlockAlign != expectedBlockAlign)
|
|
{
|
|
throw new InvalidDataException($"Invalid block align: expected {expectedBlockAlign}, got {metadata.BlockAlign}");
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Builds the canonical 44-byte standard-PCM WAV header (audioFormat = 1) for a normalized stream.
|
|
/// The body is written separately so no whole-file buffer is allocated; this only emits the header
|
|
/// the streaming pipeline expects, reporting the valid (post-normalization) bit depth.
|
|
/// </summary>
|
|
private static byte[] BuildStandardPcmHeader(int channels, int sampleRate, int outBitsPerSample, long dataSize)
|
|
{
|
|
const int headerSize = 44;
|
|
var result = new byte[headerSize];
|
|
|
|
var blockAlign = (ushort)(channels * (outBitsPerSample / 8));
|
|
var byteRate = (uint)(sampleRate * blockAlign);
|
|
|
|
// RIFF header
|
|
System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0);
|
|
BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4);
|
|
System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8);
|
|
|
|
// fmt chunk (standard 16-byte PCM)
|
|
System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12);
|
|
BitConverter.GetBytes((uint)16).CopyTo(result, 16);
|
|
BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM
|
|
BitConverter.GetBytes((ushort)channels).CopyTo(result, 22);
|
|
BitConverter.GetBytes((uint)sampleRate).CopyTo(result, 24);
|
|
BitConverter.GetBytes(byteRate).CopyTo(result, 28);
|
|
BitConverter.GetBytes(blockAlign).CopyTo(result, 32);
|
|
BitConverter.GetBytes((ushort)outBitsPerSample).CopyTo(result, 34);
|
|
|
|
// data chunk
|
|
System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36);
|
|
BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40);
|
|
|
|
return result;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Converts 32-bit little-endian IEEE float samples (range [-1.0, 1.0]) to 24-bit signed PCM.
|
|
/// Each 4-byte source sample becomes 3 little-endian output bytes; output size is 3/4 of input.
|
|
/// Trailing bytes that do not form a complete 4-byte sample are ignored.
|
|
/// </summary>
|
|
private static byte[] ConvertFloatTo24BitPcm(byte[] buffer, int dataStart, int dataSize)
|
|
{
|
|
var sampleCount = dataSize / 4;
|
|
var output = new byte[sampleCount * 3];
|
|
|
|
for (int i = 0; i < sampleCount; i++)
|
|
{
|
|
var sample = BitConverter.ToSingle(buffer, dataStart + i * 4);
|
|
var value = (int)(sample * 8388607.0);
|
|
value = Math.Clamp(value, -8388608, 8388607);
|
|
|
|
var o = i * 3;
|
|
output[o] = (byte)(value & 0xFF);
|
|
output[o + 1] = (byte)((value >> 8) & 0xFF);
|
|
output[o + 2] = (byte)((value >> 16) & 0xFF);
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Strips container padding from a padded-container EXTENSIBLE WAV (e.g. 24-bit valid samples
|
|
/// stored in 32-bit containers), keeping only the lowest <paramref name="validBits"/> bytes of
|
|
/// each little-endian sample. Output size is (validBits/containerBits) of input.
|
|
/// Trailing bytes that do not form a complete container sample are ignored.
|
|
/// </summary>
|
|
private static byte[] RepackPaddedContainer(byte[] buffer, int dataStart, int dataSize, int containerBits, int validBits)
|
|
{
|
|
var containerBytes = containerBits / 8;
|
|
var validBytes = validBits / 8;
|
|
var sampleCount = dataSize / containerBytes;
|
|
var output = new byte[sampleCount * validBytes];
|
|
|
|
for (int i = 0; i < sampleCount; i++)
|
|
{
|
|
var src = dataStart + i * containerBytes;
|
|
var dst = i * validBytes;
|
|
// Little-endian: the valid sample occupies the low bytes; the upper bytes are padding /
|
|
// sign extension and are discarded.
|
|
for (int b = 0; b < validBytes; b++)
|
|
{
|
|
output[dst + b] = buffer[src + b];
|
|
}
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns default WAV metadata for fallback scenarios
|
|
/// </summary>
|
|
private WavMetadata GetDefaultWavMetadata()
|
|
{
|
|
return new WavMetadata
|
|
{
|
|
Duration = 180.0,
|
|
Bitrate = 1411,
|
|
SampleRate = 44100,
|
|
Channels = 2,
|
|
BitsPerSample = 16,
|
|
BlockAlign = 4,
|
|
DataSize = 0
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds a chunk in the WAV file buffer with proper alignment handling
|
|
/// </summary>
|
|
private static int FindChunk(byte[] buffer, string chunkId)
|
|
{
|
|
var chunkBytes = System.Text.Encoding.ASCII.GetBytes(chunkId);
|
|
int offset = 12; // Start after RIFF header
|
|
|
|
while (offset <= buffer.Length - 8)
|
|
{
|
|
// Check for chunk signature match
|
|
bool match = true;
|
|
for (int i = 0; i < 4; i++)
|
|
{
|
|
if (buffer[offset + i] != chunkBytes[i])
|
|
{
|
|
match = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (match)
|
|
{
|
|
return offset;
|
|
}
|
|
|
|
// Move to next chunk with proper alignment
|
|
if (offset + 4 < buffer.Length)
|
|
{
|
|
var chunkSize = BitConverter.ToUInt32(buffer, offset + 4);
|
|
offset += 8 + (int)((chunkSize + 1) & ~1U); // Ensure even alignment
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
/// <summary>
|
|
/// WAV file metadata with complete audio information
|
|
/// </summary>
|
|
private class WavMetadata
|
|
{
|
|
public double Duration { get; set; }
|
|
public int Bitrate { get; set; }
|
|
public int SampleRate { get; set; }
|
|
public int Channels { get; set; }
|
|
|
|
/// <summary>The valid sample depth — for EXTENSIBLE, wValidBitsPerSample.</summary>
|
|
public int BitsPerSample { get; set; }
|
|
|
|
/// <summary>
|
|
/// The container sample width for a padded EXTENSIBLE WAV whose valid depth is narrower
|
|
/// (e.g. 32 for a 24-in-32 file). Zero when the container matches the valid depth.
|
|
/// </summary>
|
|
public int ContainerBitsPerSample { get; set; }
|
|
|
|
public int BlockAlign { get; set; }
|
|
public int DataSize { get; set; }
|
|
public int DataChunkPos { get; set; }
|
|
public bool IsExtensible { get; set; }
|
|
|
|
/// <summary>True when the SubFormat is IEEE float (converted to 24-bit PCM on normalization).</summary>
|
|
public bool IsFloat { get; set; }
|
|
|
|
/// <summary>True when valid samples are stored in a wider container that must be re-packed.</summary>
|
|
public bool IsPaddedContainer => ContainerBitsPerSample != 0 && ContainerBitsPerSample != BitsPerSample;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Result of WAV structure validation
|
|
/// </summary>
|
|
private class WavValidationResult
|
|
{
|
|
public bool IsValid { get; set; }
|
|
public string ErrorMessage { get; set; } = string.Empty;
|
|
public int FmtChunkPos { get; set; }
|
|
public int DataChunkPos { get; set; }
|
|
public bool IsExtensible { get; set; }
|
|
|
|
/// <summary>True when the EXTENSIBLE SubFormat is IEEE float rather than PCM.</summary>
|
|
public bool IsFloat { get; set; }
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// The raw PCM sample region of a WAV plus the format parameters needed to interpret it.
|
|
/// <see cref="Pcm"/> is a view over the decoded buffer — the data chunk only, header excluded.
|
|
/// </summary>
|
|
/// <param name="Pcm">The PCM sample bytes (interleaved by channel, little-endian).</param>
|
|
/// <param name="Channels">Number of interleaved channels.</param>
|
|
/// <param name="SampleRate">Samples per second.</param>
|
|
/// <param name="BitsPerSample">Bit depth per sample (8, 16, 24, or 32).</param>
|
|
public readonly record struct PcmData(
|
|
ReadOnlyMemory<byte> Pcm,
|
|
int Channels,
|
|
int SampleRate,
|
|
int BitsPerSample); |