using DeepDrftContent.FileDatabase.Models;
namespace DeepDrftContent.Processors;
///
/// Service for processing audio files and extracting metadata
///
public class AudioProcessor
{
///
/// Processes a WAV file and creates an AudioBinary object
///
/// Path to the WAV file
/// AudioBinary object with metadata
public async Task ProcessWavFileAsync(string filePath)
{
if (!File.Exists(filePath))
{
throw new FileNotFoundException($"WAV file not found: {filePath}");
}
if (!Path.GetExtension(filePath).Equals(".wav", StringComparison.OrdinalIgnoreCase))
{
throw new ArgumentException("File must be a WAV file", nameof(filePath));
}
try
{
var buffer = await File.ReadAllBytesAsync(filePath);
var wavInfo = ExtractWavMetadata(buffer);
// EXTENSIBLE-PCM is byte-compatible with standard PCM but carries a 40+ byte fmt chunk
// the streaming pipeline never expects. Normalize to a plain 44-byte PCM WAV at storage
// time so the vault only ever holds standard PCM and the client decode path stays unchanged.
var storedBuffer = wavInfo.IsExtensible ? NormalizeToStandardPcm(buffer, wavInfo) : buffer;
var parameters = new AudioBinaryParams(
Buffer: storedBuffer,
Size: storedBuffer.Length,
Extension: ".wav",
Duration: wavInfo.Duration,
Bitrate: wavInfo.Bitrate
);
return new AudioBinary(parameters);
}
catch (Exception ex)
{
throw new InvalidOperationException($"Failed to process WAV file: {ex.Message}", ex);
}
}
///
/// Extracts the raw PCM data region and format parameters from a WAV buffer, reusing the
/// same chunk-walk and validation as metadata extraction. Returns null if the buffer is not
/// a valid PCM WAV (callers treat a null as "no profile computable" and continue) — unlike
/// , this does NOT fall back to synthetic defaults, because a
/// loudness profile over fabricated silence would be misleading.
///
public PcmData? TryExtractPcm(ReadOnlySpan buffer)
{
// Copy the span to an array so the existing array-based parsers can be reused. The PCM
// slice returned is a view over this array (no second copy of the data region).
var bytes = buffer.ToArray();
var validation = ValidateWavStructure(bytes);
if (!validation.IsValid)
{
return null;
}
WavMetadata metadata;
try
{
metadata = ParseWavMetadata(bytes, validation);
ValidateAudioParameters(metadata);
}
catch
{
return null;
}
// Data bytes begin 8 past the "data" chunk id (4 id + 4 size). Clamp the declared size to
// what is actually present — some encoders write a size that overshoots the file.
var dataStart = validation.DataChunkPos + 8;
if (dataStart > bytes.Length)
{
return null;
}
var available = bytes.Length - dataStart;
var dataLength = Math.Min(metadata.DataSize, available);
if (dataLength <= 0)
{
return null;
}
var pcm = new ReadOnlyMemory(bytes, dataStart, dataLength);
return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample);
}
///
/// Extracts metadata from WAV file buffer with comprehensive validation
///
private WavMetadata ExtractWavMetadata(byte[] buffer)
{
try
{
var validationResult = ValidateWavStructure(buffer);
if (!validationResult.IsValid)
{
throw new InvalidDataException($"WAV validation failed: {validationResult.ErrorMessage}");
}
var metadata = ParseWavMetadata(buffer, validationResult);
ValidateAudioParameters(metadata);
return metadata;
}
catch (Exception ex)
{
Console.WriteLine($"Warning: WAV parsing failed, using defaults: {ex.Message}");
return GetDefaultWavMetadata();
}
}
///
/// Validates WAV file structure and returns parsing information
///
private WavValidationResult ValidateWavStructure(byte[] buffer)
{
if (buffer.Length < 44)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "File too short" };
}
// Validate RIFF signature
var riffSignature = System.Text.Encoding.ASCII.GetString(buffer, 0, 4);
if (riffSignature != "RIFF")
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid RIFF signature" };
}
// Validate WAVE signature
var waveSignature = System.Text.Encoding.ASCII.GetString(buffer, 8, 4);
if (waveSignature != "WAVE")
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid WAVE signature" };
}
// Find and validate fmt chunk
var fmtChunkPos = FindChunk(buffer, "fmt ");
if (fmtChunkPos == -1)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing fmt chunk" };
}
var fmtChunkSize = BitConverter.ToUInt32(buffer, fmtChunkPos + 4);
if (fmtChunkSize < 16)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" };
}
// Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE
// (0xFFFE) is accepted only when its SubFormat GUID indicates PCM — the raw sample data is
// then byte-identical to standard PCM and we normalize it downstream.
var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8);
var isExtensible = false;
if (audioFormat == 0xFFFE)
{
// EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes.
if (fmtChunkSize < 40)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" };
}
if (fmtChunkPos + 8 + 40 > buffer.Length)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk extends past end of file" };
}
// SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its
// first two bytes are the little-endian format tag; 0x0001 == WAVE_FORMAT_PCM.
var subFormatPos = fmtChunkPos + 8 + 24;
if (buffer[subFormatPos] != 0x01 || buffer[subFormatPos + 1] != 0x00)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is not PCM" };
}
isExtensible = true;
}
else if (audioFormat != 1)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" };
}
// Find data chunk
var dataChunkPos = FindChunk(buffer, "data");
if (dataChunkPos == -1)
{
return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" };
}
return new WavValidationResult
{
IsValid = true,
FmtChunkPos = fmtChunkPos,
DataChunkPos = dataChunkPos,
IsExtensible = isExtensible
};
}
///
/// Parses WAV metadata from validated buffer
///
private WavMetadata ParseWavMetadata(byte[] buffer, WavValidationResult validation)
{
var channels = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 10);
var sampleRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 12);
var byteRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 16);
var blockAlign = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 20);
var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22);
var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4);
// For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in
// wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24)
// but the valid bits are authoritative for the normalized header and metadata.
// Note: padded-container EXTENSIBLE (e.g. 24-bit valid in a 32-bit container) is not yet
// supported — the mismatched BlockAlign will cause ValidateAudioParameters to throw and fall
// back to defaults. This is an accepted gap as of this fix.
if (validation.IsExtensible)
{
bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18);
}
var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0;
var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000);
return new WavMetadata
{
Duration = duration,
Bitrate = bitrate,
SampleRate = (int)sampleRate,
Channels = channels,
BitsPerSample = bitsPerSample,
BlockAlign = blockAlign,
DataSize = (int)dataSize,
DataChunkPos = validation.DataChunkPos,
IsExtensible = validation.IsExtensible
};
}
///
/// Validates audio parameters for reasonableness
///
private void ValidateAudioParameters(WavMetadata metadata)
{
var validSampleRates = new[] { 8000, 11025, 16000, 22050, 44100, 48000, 88200, 96000, 176400, 192000 };
var validBitDepths = new[] { 8, 16, 24, 32 };
if (metadata.Channels < 1 || metadata.Channels > 8)
{
throw new InvalidDataException($"Invalid channel count: {metadata.Channels}");
}
if (!validSampleRates.Contains(metadata.SampleRate))
{
throw new InvalidDataException($"Unsupported sample rate: {metadata.SampleRate}");
}
if (!validBitDepths.Contains(metadata.BitsPerSample))
{
throw new InvalidDataException($"Unsupported bit depth: {metadata.BitsPerSample}");
}
var expectedBlockAlign = metadata.Channels * (metadata.BitsPerSample / 8);
if (metadata.BlockAlign != expectedBlockAlign)
{
throw new InvalidDataException($"Invalid block align: expected {expectedBlockAlign}, got {metadata.BlockAlign}");
}
}
///
/// Rebuilds an EXTENSIBLE-PCM WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1).
/// The sample bytes are copied verbatim — EXTENSIBLE-PCM data is byte-identical to standard PCM —
/// only the header is replaced, so the vault stores a format the streaming pipeline already handles.
///
private byte[] NormalizeToStandardPcm(byte[] buffer, WavMetadata metadata)
{
// Clamp the declared data size to what is actually present; some encoders overshoot.
var dataStart = metadata.DataChunkPos + 8;
var available = buffer.Length - dataStart;
var dataSize = Math.Min(metadata.DataSize, available);
const int headerSize = 44;
var result = new byte[headerSize + dataSize];
var blockAlign = (ushort)(metadata.Channels * (metadata.BitsPerSample / 8));
var byteRate = (uint)(metadata.SampleRate * blockAlign);
// RIFF header
System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0);
BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4);
System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8);
// fmt chunk (standard 16-byte PCM)
System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12);
BitConverter.GetBytes((uint)16).CopyTo(result, 16);
BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM
BitConverter.GetBytes((ushort)metadata.Channels).CopyTo(result, 22);
BitConverter.GetBytes((uint)metadata.SampleRate).CopyTo(result, 24);
BitConverter.GetBytes(byteRate).CopyTo(result, 28);
BitConverter.GetBytes(blockAlign).CopyTo(result, 32);
BitConverter.GetBytes((ushort)metadata.BitsPerSample).CopyTo(result, 34);
// data chunk
System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36);
BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40);
Array.Copy(buffer, dataStart, result, headerSize, dataSize);
return result;
}
///
/// Returns default WAV metadata for fallback scenarios
///
private WavMetadata GetDefaultWavMetadata()
{
return new WavMetadata
{
Duration = 180.0,
Bitrate = 1411,
SampleRate = 44100,
Channels = 2,
BitsPerSample = 16,
BlockAlign = 4,
DataSize = 0
};
}
///
/// Finds a chunk in the WAV file buffer with proper alignment handling
///
private int FindChunk(byte[] buffer, string chunkId)
{
var chunkBytes = System.Text.Encoding.ASCII.GetBytes(chunkId);
int offset = 12; // Start after RIFF header
while (offset <= buffer.Length - 8)
{
// Check for chunk signature match
bool match = true;
for (int i = 0; i < 4; i++)
{
if (buffer[offset + i] != chunkBytes[i])
{
match = false;
break;
}
}
if (match)
{
return offset;
}
// Move to next chunk with proper alignment
if (offset + 4 < buffer.Length)
{
var chunkSize = BitConverter.ToUInt32(buffer, offset + 4);
offset += 8 + (int)((chunkSize + 1) & ~1U); // Ensure even alignment
}
else
{
break;
}
}
return -1;
}
///
/// WAV file metadata with complete audio information
///
private class WavMetadata
{
public double Duration { get; set; }
public int Bitrate { get; set; }
public int SampleRate { get; set; }
public int Channels { get; set; }
public int BitsPerSample { get; set; }
public int BlockAlign { get; set; }
public int DataSize { get; set; }
public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
}
///
/// Result of WAV structure validation
///
private class WavValidationResult
{
public bool IsValid { get; set; }
public string ErrorMessage { get; set; } = string.Empty;
public int FmtChunkPos { get; set; }
public int DataChunkPos { get; set; }
public bool IsExtensible { get; set; }
}
}
///
/// The raw PCM sample region of a WAV plus the format parameters needed to interpret it.
/// is a view over the decoded buffer — the data chunk only, header excluded.
///
/// The PCM sample bytes (interleaved by channel, little-endian).
/// Number of interleaved channels.
/// Samples per second.
/// Bit depth per sample (8, 16, 24, or 32).
public readonly record struct PcmData(
ReadOnlyMemory Pcm,
int Channels,
int SampleRate,
int BitsPerSample);