using DeepDrftContent.FileDatabase.Models; namespace DeepDrftContent.Processors; /// /// Service for processing audio files and extracting metadata /// public class AudioProcessor { /// /// Processes a WAV file and creates an AudioBinary object /// /// Path to the WAV file /// AudioBinary object with metadata public async Task ProcessWavFileAsync(string filePath) { if (!File.Exists(filePath)) { throw new FileNotFoundException($"WAV file not found: {filePath}"); } if (!Path.GetExtension(filePath).Equals(".wav", StringComparison.OrdinalIgnoreCase)) { throw new ArgumentException("File must be a WAV file", nameof(filePath)); } try { var buffer = await File.ReadAllBytesAsync(filePath); var wavInfo = ExtractWavMetadata(buffer); // EXTENSIBLE-PCM is byte-compatible with standard PCM but carries a 40+ byte fmt chunk // the streaming pipeline never expects. Normalize to a plain 44-byte PCM WAV at storage // time so the vault only ever holds standard PCM and the client decode path stays unchanged. var storedBuffer = wavInfo.IsExtensible ? NormalizeToStandardPcm(buffer, wavInfo) : buffer; var parameters = new AudioBinaryParams( Buffer: storedBuffer, Size: storedBuffer.Length, Extension: ".wav", Duration: wavInfo.Duration, Bitrate: wavInfo.Bitrate ); return new AudioBinary(parameters); } catch (Exception ex) { throw new InvalidOperationException($"Failed to process WAV file: {ex.Message}", ex); } } /// /// Extracts the raw PCM data region and format parameters from a WAV buffer, reusing the /// same chunk-walk and validation as metadata extraction. Returns null if the buffer is not /// a valid PCM WAV (callers treat a null as "no profile computable" and continue) — unlike /// , this does NOT fall back to synthetic defaults, because a /// loudness profile over fabricated silence would be misleading. /// public PcmData? TryExtractPcm(ReadOnlySpan buffer) { // Copy the span to an array so the existing array-based parsers can be reused. The PCM // slice returned is a view over this array (no second copy of the data region). var bytes = buffer.ToArray(); var validation = ValidateWavStructure(bytes); if (!validation.IsValid) { return null; } WavMetadata metadata; try { metadata = ParseWavMetadata(bytes, validation); ValidateAudioParameters(metadata); } catch { return null; } // Data bytes begin 8 past the "data" chunk id (4 id + 4 size). Clamp the declared size to // what is actually present — some encoders write a size that overshoots the file. var dataStart = validation.DataChunkPos + 8; if (dataStart > bytes.Length) { return null; } var available = bytes.Length - dataStart; var dataLength = Math.Min(metadata.DataSize, available); if (dataLength <= 0) { return null; } var pcm = new ReadOnlyMemory(bytes, dataStart, dataLength); return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample); } /// /// Extracts metadata from WAV file buffer with comprehensive validation /// private WavMetadata ExtractWavMetadata(byte[] buffer) { try { var validationResult = ValidateWavStructure(buffer); if (!validationResult.IsValid) { throw new InvalidDataException($"WAV validation failed: {validationResult.ErrorMessage}"); } var metadata = ParseWavMetadata(buffer, validationResult); ValidateAudioParameters(metadata); return metadata; } catch (Exception ex) { Console.WriteLine($"Warning: WAV parsing failed, using defaults: {ex.Message}"); return GetDefaultWavMetadata(); } } /// /// Validates WAV file structure and returns parsing information /// private WavValidationResult ValidateWavStructure(byte[] buffer) { if (buffer.Length < 44) { return new WavValidationResult { IsValid = false, ErrorMessage = "File too short" }; } // Validate RIFF signature var riffSignature = System.Text.Encoding.ASCII.GetString(buffer, 0, 4); if (riffSignature != "RIFF") { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid RIFF signature" }; } // Validate WAVE signature var waveSignature = System.Text.Encoding.ASCII.GetString(buffer, 8, 4); if (waveSignature != "WAVE") { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid WAVE signature" }; } // Find and validate fmt chunk var fmtChunkPos = FindChunk(buffer, "fmt "); if (fmtChunkPos == -1) { return new WavValidationResult { IsValid = false, ErrorMessage = "Missing fmt chunk" }; } var fmtChunkSize = BitConverter.ToUInt32(buffer, fmtChunkPos + 4); if (fmtChunkSize < 16) { return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" }; } // Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE // (0xFFFE) is accepted only when its SubFormat GUID indicates PCM — the raw sample data is // then byte-identical to standard PCM and we normalize it downstream. var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8); var isExtensible = false; if (audioFormat == 0xFFFE) { // EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes. if (fmtChunkSize < 40) { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" }; } if (fmtChunkPos + 8 + 40 > buffer.Length) { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk extends past end of file" }; } // SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its // first two bytes are the little-endian format tag; 0x0001 == WAVE_FORMAT_PCM. var subFormatPos = fmtChunkPos + 8 + 24; if (buffer[subFormatPos] != 0x01 || buffer[subFormatPos + 1] != 0x00) { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is not PCM" }; } isExtensible = true; } else if (audioFormat != 1) { return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" }; } // Find data chunk var dataChunkPos = FindChunk(buffer, "data"); if (dataChunkPos == -1) { return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" }; } return new WavValidationResult { IsValid = true, FmtChunkPos = fmtChunkPos, DataChunkPos = dataChunkPos, IsExtensible = isExtensible }; } /// /// Parses WAV metadata from validated buffer /// private WavMetadata ParseWavMetadata(byte[] buffer, WavValidationResult validation) { var channels = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 10); var sampleRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 12); var byteRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 16); var blockAlign = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 20); var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22); var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4); // For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in // wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24) // but the valid bits are authoritative for the normalized header and metadata. // Note: padded-container EXTENSIBLE (e.g. 24-bit valid in a 32-bit container) is not yet // supported — the mismatched BlockAlign will cause ValidateAudioParameters to throw and fall // back to defaults. This is an accepted gap as of this fix. if (validation.IsExtensible) { bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18); } var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0; var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000); return new WavMetadata { Duration = duration, Bitrate = bitrate, SampleRate = (int)sampleRate, Channels = channels, BitsPerSample = bitsPerSample, BlockAlign = blockAlign, DataSize = (int)dataSize, DataChunkPos = validation.DataChunkPos, IsExtensible = validation.IsExtensible }; } /// /// Validates audio parameters for reasonableness /// private void ValidateAudioParameters(WavMetadata metadata) { var validSampleRates = new[] { 8000, 11025, 16000, 22050, 44100, 48000, 88200, 96000, 176400, 192000 }; var validBitDepths = new[] { 8, 16, 24, 32 }; if (metadata.Channels < 1 || metadata.Channels > 8) { throw new InvalidDataException($"Invalid channel count: {metadata.Channels}"); } if (!validSampleRates.Contains(metadata.SampleRate)) { throw new InvalidDataException($"Unsupported sample rate: {metadata.SampleRate}"); } if (!validBitDepths.Contains(metadata.BitsPerSample)) { throw new InvalidDataException($"Unsupported bit depth: {metadata.BitsPerSample}"); } var expectedBlockAlign = metadata.Channels * (metadata.BitsPerSample / 8); if (metadata.BlockAlign != expectedBlockAlign) { throw new InvalidDataException($"Invalid block align: expected {expectedBlockAlign}, got {metadata.BlockAlign}"); } } /// /// Rebuilds an EXTENSIBLE-PCM WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1). /// The sample bytes are copied verbatim — EXTENSIBLE-PCM data is byte-identical to standard PCM — /// only the header is replaced, so the vault stores a format the streaming pipeline already handles. /// private byte[] NormalizeToStandardPcm(byte[] buffer, WavMetadata metadata) { // Clamp the declared data size to what is actually present; some encoders overshoot. var dataStart = metadata.DataChunkPos + 8; var available = buffer.Length - dataStart; var dataSize = Math.Min(metadata.DataSize, available); const int headerSize = 44; var result = new byte[headerSize + dataSize]; var blockAlign = (ushort)(metadata.Channels * (metadata.BitsPerSample / 8)); var byteRate = (uint)(metadata.SampleRate * blockAlign); // RIFF header System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0); BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4); System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8); // fmt chunk (standard 16-byte PCM) System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12); BitConverter.GetBytes((uint)16).CopyTo(result, 16); BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM BitConverter.GetBytes((ushort)metadata.Channels).CopyTo(result, 22); BitConverter.GetBytes((uint)metadata.SampleRate).CopyTo(result, 24); BitConverter.GetBytes(byteRate).CopyTo(result, 28); BitConverter.GetBytes(blockAlign).CopyTo(result, 32); BitConverter.GetBytes((ushort)metadata.BitsPerSample).CopyTo(result, 34); // data chunk System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36); BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40); Array.Copy(buffer, dataStart, result, headerSize, dataSize); return result; } /// /// Returns default WAV metadata for fallback scenarios /// private WavMetadata GetDefaultWavMetadata() { return new WavMetadata { Duration = 180.0, Bitrate = 1411, SampleRate = 44100, Channels = 2, BitsPerSample = 16, BlockAlign = 4, DataSize = 0 }; } /// /// Finds a chunk in the WAV file buffer with proper alignment handling /// private int FindChunk(byte[] buffer, string chunkId) { var chunkBytes = System.Text.Encoding.ASCII.GetBytes(chunkId); int offset = 12; // Start after RIFF header while (offset <= buffer.Length - 8) { // Check for chunk signature match bool match = true; for (int i = 0; i < 4; i++) { if (buffer[offset + i] != chunkBytes[i]) { match = false; break; } } if (match) { return offset; } // Move to next chunk with proper alignment if (offset + 4 < buffer.Length) { var chunkSize = BitConverter.ToUInt32(buffer, offset + 4); offset += 8 + (int)((chunkSize + 1) & ~1U); // Ensure even alignment } else { break; } } return -1; } /// /// WAV file metadata with complete audio information /// private class WavMetadata { public double Duration { get; set; } public int Bitrate { get; set; } public int SampleRate { get; set; } public int Channels { get; set; } public int BitsPerSample { get; set; } public int BlockAlign { get; set; } public int DataSize { get; set; } public int DataChunkPos { get; set; } public bool IsExtensible { get; set; } } /// /// Result of WAV structure validation /// private class WavValidationResult { public bool IsValid { get; set; } public string ErrorMessage { get; set; } = string.Empty; public int FmtChunkPos { get; set; } public int DataChunkPos { get; set; } public bool IsExtensible { get; set; } } } /// /// The raw PCM sample region of a WAV plus the format parameters needed to interpret it. /// is a view over the decoded buffer — the data chunk only, header excluded. /// /// The PCM sample bytes (interleaved by channel, little-endian). /// Number of interleaved channels. /// Samples per second. /// Bit depth per sample (8, 16, 24, or 32). public readonly record struct PcmData( ReadOnlyMemory Pcm, int Channels, int SampleRate, int BitsPerSample);