using DeepDrftContent.FileDatabase.Models; namespace DeepDrftContent.Processors; /// /// Service for processing audio files and extracting metadata /// public class AudioProcessor { /// /// Processes a WAV file and creates an AudioBinary object /// /// Path to the WAV file /// AudioBinary object with metadata public async Task ProcessWavFileAsync(string filePath) { if (!File.Exists(filePath)) { throw new FileNotFoundException($"WAV file not found: {filePath}"); } if (!Path.GetExtension(filePath).Equals(".wav", StringComparison.OrdinalIgnoreCase)) { throw new ArgumentException("File must be a WAV file", nameof(filePath)); } try { var buffer = await File.ReadAllBytesAsync(filePath); var wavInfo = ExtractWavMetadata(buffer); // EXTENSIBLE-PCM is byte-compatible with standard PCM but carries a 40+ byte fmt chunk // the streaming pipeline never expects. Normalize to a plain 44-byte PCM WAV at storage // time so the vault only ever holds standard PCM and the client decode path stays unchanged. var storedBuffer = wavInfo.IsExtensible ? NormalizeToStandardPcm(buffer, wavInfo) : buffer; var parameters = new AudioBinaryParams( Buffer: storedBuffer, Size: storedBuffer.Length, Extension: ".wav", Duration: wavInfo.Duration, Bitrate: wavInfo.Bitrate ); return new AudioBinary(parameters); } catch (Exception ex) { throw new InvalidOperationException($"Failed to process WAV file: {ex.Message}", ex); } } /// /// Extracts the raw PCM data region and format parameters from a WAV buffer, reusing the /// same chunk-walk and validation as metadata extraction. Returns null if the buffer is not /// a valid PCM WAV (callers treat a null as "no profile computable" and continue) — unlike /// , this does NOT fall back to synthetic defaults, because a /// loudness profile over fabricated silence would be misleading. /// public PcmData? TryExtractPcm(ReadOnlySpan buffer) { // Copy the span to an array so the existing array-based parsers can be reused. The PCM // slice returned is a view over this array (no second copy of the data region). var bytes = buffer.ToArray(); var validation = ValidateWavStructure(bytes); if (!validation.IsValid) { return null; } // Float and padded-container EXTENSIBLE require a sample-level transform to become integer PCM. // TryExtractPcm feeds loudness analysis, not storage, and must not hand back float bytes // mislabeled as integer PCM — out of scope here, so treat them as "no profile computable". if (validation.IsFloat) { return null; } WavMetadata metadata; try { metadata = ParseWavMetadata(bytes, validation); ValidateAudioParameters(metadata); if (metadata.IsPaddedContainer) { return null; } } catch { return null; } // Data bytes begin 8 past the "data" chunk id (4 id + 4 size). Clamp the declared size to // what is actually present — some encoders write a size that overshoots the file. var dataStart = validation.DataChunkPos + 8; if (dataStart > bytes.Length) { return null; } var available = bytes.Length - dataStart; var dataLength = Math.Min(metadata.DataSize, available); if (dataLength <= 0) { return null; } var pcm = new ReadOnlyMemory(bytes, dataStart, dataLength); return new PcmData(pcm, metadata.Channels, metadata.SampleRate, metadata.BitsPerSample); } /// /// Extracts metadata from WAV file buffer with comprehensive validation /// private WavMetadata ExtractWavMetadata(byte[] buffer) { try { var validationResult = ValidateWavStructure(buffer); if (!validationResult.IsValid) { throw new InvalidDataException($"WAV validation failed: {validationResult.ErrorMessage}"); } var metadata = ParseWavMetadata(buffer, validationResult); ValidateAudioParameters(metadata); return metadata; } catch (Exception ex) { Console.WriteLine($"Warning: WAV parsing failed, using defaults: {ex.Message}"); return GetDefaultWavMetadata(); } } /// /// Validates WAV file structure and returns parsing information /// private WavValidationResult ValidateWavStructure(byte[] buffer) { if (buffer.Length < 44) { return new WavValidationResult { IsValid = false, ErrorMessage = "File too short" }; } // Validate RIFF signature var riffSignature = System.Text.Encoding.ASCII.GetString(buffer, 0, 4); if (riffSignature != "RIFF") { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid RIFF signature" }; } // Validate WAVE signature var waveSignature = System.Text.Encoding.ASCII.GetString(buffer, 8, 4); if (waveSignature != "WAVE") { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid WAVE signature" }; } // Find and validate fmt chunk var fmtChunkPos = FindChunk(buffer, "fmt "); if (fmtChunkPos == -1) { return new WavValidationResult { IsValid = false, ErrorMessage = "Missing fmt chunk" }; } var fmtChunkSize = BitConverter.ToUInt32(buffer, fmtChunkPos + 4); if (fmtChunkSize < 16) { return new WavValidationResult { IsValid = false, ErrorMessage = "fmt chunk too small" }; } // Validate audio format. Standard PCM (1) is accepted directly. WAVE_FORMAT_EXTENSIBLE // (0xFFFE) is accepted when its SubFormat GUID indicates PCM (0x0001) or IEEE float // (0x0003). PCM sample data is byte-identical to standard PCM; float data is converted to // 24-bit PCM downstream. Either way the vault only ever holds standard PCM. var audioFormat = BitConverter.ToUInt16(buffer, fmtChunkPos + 8); var isExtensible = false; var isFloat = false; if (audioFormat == 0xFFFE) { // EXTENSIBLE requires the full extension: 16 base + 2 cbSize + 22 extension = 40 bytes. if (fmtChunkSize < 40) { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk too small" }; } if (fmtChunkPos + 8 + 40 > buffer.Length) { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE fmt chunk extends past end of file" }; } // SubFormat GUID begins 24 bytes into the fmt chunk data (fmtChunkPos + 8 + 24). Its // first two bytes are the little-endian format tag: 0x0001 == WAVE_FORMAT_PCM, // 0x0003 == WAVE_FORMAT_IEEE_FLOAT. var subFormatPos = fmtChunkPos + 8 + 24; var subFormatTag = BitConverter.ToUInt16(buffer, subFormatPos); if (subFormatTag == 0x0001) { isExtensible = true; } else if (subFormatTag == 0x0003) { isExtensible = true; isFloat = true; } else { return new WavValidationResult { IsValid = false, ErrorMessage = "Invalid data: EXTENSIBLE SubFormat is neither PCM nor IEEE float" }; } } else if (audioFormat != 1) { return new WavValidationResult { IsValid = false, ErrorMessage = "Only PCM format supported" }; } // Find data chunk var dataChunkPos = FindChunk(buffer, "data"); if (dataChunkPos == -1) { return new WavValidationResult { IsValid = false, ErrorMessage = "Missing data chunk" }; } return new WavValidationResult { IsValid = true, FmtChunkPos = fmtChunkPos, DataChunkPos = dataChunkPos, IsExtensible = isExtensible, IsFloat = isFloat }; } /// /// Parses WAV metadata from validated buffer /// private WavMetadata ParseWavMetadata(byte[] buffer, WavValidationResult validation) { var channels = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 10); var sampleRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 12); var byteRate = BitConverter.ToUInt32(buffer, validation.FmtChunkPos + 16); var blockAlign = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 20); var bitsPerSample = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 22); var dataSize = BitConverter.ToUInt32(buffer, validation.DataChunkPos + 4); // For EXTENSIBLE the offset-22 field is the container width; the true sample depth lives in // wValidBitsPerSample (fmtChunkPos + 8 + 18). They usually match (Bandcamp 24-bit = 24/24) // but the valid bits are authoritative for the normalized header and metadata. When they // differ (e.g. 24-bit valid in a 32-bit container) we keep the container width separately so // ValidateAudioParameters can reconcile against the header BlockAlign and NormalizeToStandardPcm // can re-pack the padded frames. var containerBitsPerSample = 0; if (validation.IsExtensible) { var validBits = BitConverter.ToUInt16(buffer, validation.FmtChunkPos + 8 + 18); if (validBits != bitsPerSample) { containerBitsPerSample = bitsPerSample; } bitsPerSample = validBits; } var duration = byteRate > 0 ? (double)dataSize / byteRate : 0.0; var bitrate = (int)((sampleRate * channels * bitsPerSample) / 1000); return new WavMetadata { Duration = duration, Bitrate = bitrate, SampleRate = (int)sampleRate, Channels = channels, BitsPerSample = bitsPerSample, ContainerBitsPerSample = containerBitsPerSample, BlockAlign = blockAlign, DataSize = (int)dataSize, DataChunkPos = validation.DataChunkPos, IsExtensible = validation.IsExtensible, IsFloat = validation.IsFloat }; } /// /// Validates audio parameters for reasonableness /// private void ValidateAudioParameters(WavMetadata metadata) { var validSampleRates = new[] { 8000, 11025, 16000, 22050, 44100, 48000, 88200, 96000, 176400, 192000 }; var validBitDepths = new[] { 8, 16, 24, 32 }; if (metadata.Channels < 1 || metadata.Channels > 8) { throw new InvalidDataException($"Invalid channel count: {metadata.Channels}"); } if (!validSampleRates.Contains(metadata.SampleRate)) { throw new InvalidDataException($"Unsupported sample rate: {metadata.SampleRate}"); } if (!validBitDepths.Contains(metadata.BitsPerSample)) { throw new InvalidDataException($"Unsupported bit depth: {metadata.BitsPerSample}"); } // The header BlockAlign reflects the container width, not the valid bit depth. For a padded // EXTENSIBLE container (e.g. 24-in-32) the container width is authoritative for this check; // NormalizeToStandardPcm re-packs the frames down to the valid depth afterwards. var blockAlignBits = metadata.IsPaddedContainer ? metadata.ContainerBitsPerSample : metadata.BitsPerSample; var expectedBlockAlign = metadata.Channels * (blockAlignBits / 8); if (metadata.BlockAlign != expectedBlockAlign) { throw new InvalidDataException($"Invalid block align: expected {expectedBlockAlign}, got {metadata.BlockAlign}"); } } /// /// Rebuilds an EXTENSIBLE WAV as a canonical 44-byte-header standard PCM WAV (audioFormat = 1) /// so the vault only ever holds a format the streaming pipeline already handles. Three source /// shapes are normalized: /// /// EXTENSIBLE-PCM (depth == container): sample bytes are byte-identical to standard PCM and /// copied verbatim; only the header is replaced. /// IEEE float: 32-bit float samples are converted to 24-bit signed integer PCM. /// Padded container (e.g. 24-in-32): the padding/sign-extension bytes are stripped, keeping /// the lowest valid bytes per sample. /// /// The output header always reports the valid bit depth (). /// private byte[] NormalizeToStandardPcm(byte[] buffer, WavMetadata metadata) { // Clamp the declared data size to what is actually present; some encoders overshoot. var dataStart = metadata.DataChunkPos + 8; var available = buffer.Length - dataStart; var srcDataSize = Math.Min(metadata.DataSize, available); byte[] dataBytes; int outBitsPerSample; if (metadata.IsFloat) { dataBytes = ConvertFloatTo24BitPcm(buffer, dataStart, srcDataSize); outBitsPerSample = 24; } else if (metadata.IsPaddedContainer) { dataBytes = RepackPaddedContainer(buffer, dataStart, srcDataSize, metadata.ContainerBitsPerSample, metadata.BitsPerSample); outBitsPerSample = metadata.BitsPerSample; } else { dataBytes = new byte[srcDataSize]; Array.Copy(buffer, dataStart, dataBytes, 0, srcDataSize); outBitsPerSample = metadata.BitsPerSample; } var dataSize = dataBytes.Length; const int headerSize = 44; var result = new byte[headerSize + dataSize]; var blockAlign = (ushort)(metadata.Channels * (outBitsPerSample / 8)); var byteRate = (uint)(metadata.SampleRate * blockAlign); // RIFF header System.Text.Encoding.ASCII.GetBytes("RIFF").CopyTo(result, 0); BitConverter.GetBytes((uint)(36 + dataSize)).CopyTo(result, 4); System.Text.Encoding.ASCII.GetBytes("WAVE").CopyTo(result, 8); // fmt chunk (standard 16-byte PCM) System.Text.Encoding.ASCII.GetBytes("fmt ").CopyTo(result, 12); BitConverter.GetBytes((uint)16).CopyTo(result, 16); BitConverter.GetBytes((ushort)1).CopyTo(result, 20); // audioFormat = PCM BitConverter.GetBytes((ushort)metadata.Channels).CopyTo(result, 22); BitConverter.GetBytes((uint)metadata.SampleRate).CopyTo(result, 24); BitConverter.GetBytes(byteRate).CopyTo(result, 28); BitConverter.GetBytes(blockAlign).CopyTo(result, 32); BitConverter.GetBytes((ushort)outBitsPerSample).CopyTo(result, 34); // data chunk System.Text.Encoding.ASCII.GetBytes("data").CopyTo(result, 36); BitConverter.GetBytes((uint)dataSize).CopyTo(result, 40); Array.Copy(dataBytes, 0, result, headerSize, dataSize); return result; } /// /// Converts 32-bit little-endian IEEE float samples (range [-1.0, 1.0]) to 24-bit signed PCM. /// Each 4-byte source sample becomes 3 little-endian output bytes; output size is 3/4 of input. /// Trailing bytes that do not form a complete 4-byte sample are ignored. /// private static byte[] ConvertFloatTo24BitPcm(byte[] buffer, int dataStart, int dataSize) { var sampleCount = dataSize / 4; var output = new byte[sampleCount * 3]; for (int i = 0; i < sampleCount; i++) { var sample = BitConverter.ToSingle(buffer, dataStart + i * 4); var value = (int)(sample * 8388607.0); value = Math.Clamp(value, -8388608, 8388607); var o = i * 3; output[o] = (byte)(value & 0xFF); output[o + 1] = (byte)((value >> 8) & 0xFF); output[o + 2] = (byte)((value >> 16) & 0xFF); } return output; } /// /// Strips container padding from a padded-container EXTENSIBLE WAV (e.g. 24-bit valid samples /// stored in 32-bit containers), keeping only the lowest bytes of /// each little-endian sample. Output size is (validBits/containerBits) of input. /// Trailing bytes that do not form a complete container sample are ignored. /// private static byte[] RepackPaddedContainer(byte[] buffer, int dataStart, int dataSize, int containerBits, int validBits) { var containerBytes = containerBits / 8; var validBytes = validBits / 8; var sampleCount = dataSize / containerBytes; var output = new byte[sampleCount * validBytes]; for (int i = 0; i < sampleCount; i++) { var src = dataStart + i * containerBytes; var dst = i * validBytes; // Little-endian: the valid sample occupies the low bytes; the upper bytes are padding / // sign extension and are discarded. for (int b = 0; b < validBytes; b++) { output[dst + b] = buffer[src + b]; } } return output; } /// /// Returns default WAV metadata for fallback scenarios /// private WavMetadata GetDefaultWavMetadata() { return new WavMetadata { Duration = 180.0, Bitrate = 1411, SampleRate = 44100, Channels = 2, BitsPerSample = 16, BlockAlign = 4, DataSize = 0 }; } /// /// Finds a chunk in the WAV file buffer with proper alignment handling /// private int FindChunk(byte[] buffer, string chunkId) { var chunkBytes = System.Text.Encoding.ASCII.GetBytes(chunkId); int offset = 12; // Start after RIFF header while (offset <= buffer.Length - 8) { // Check for chunk signature match bool match = true; for (int i = 0; i < 4; i++) { if (buffer[offset + i] != chunkBytes[i]) { match = false; break; } } if (match) { return offset; } // Move to next chunk with proper alignment if (offset + 4 < buffer.Length) { var chunkSize = BitConverter.ToUInt32(buffer, offset + 4); offset += 8 + (int)((chunkSize + 1) & ~1U); // Ensure even alignment } else { break; } } return -1; } /// /// WAV file metadata with complete audio information /// private class WavMetadata { public double Duration { get; set; } public int Bitrate { get; set; } public int SampleRate { get; set; } public int Channels { get; set; } /// The valid sample depth — for EXTENSIBLE, wValidBitsPerSample. public int BitsPerSample { get; set; } /// /// The container sample width for a padded EXTENSIBLE WAV whose valid depth is narrower /// (e.g. 32 for a 24-in-32 file). Zero when the container matches the valid depth. /// public int ContainerBitsPerSample { get; set; } public int BlockAlign { get; set; } public int DataSize { get; set; } public int DataChunkPos { get; set; } public bool IsExtensible { get; set; } /// True when the SubFormat is IEEE float (converted to 24-bit PCM on normalization). public bool IsFloat { get; set; } /// True when valid samples are stored in a wider container that must be re-packed. public bool IsPaddedContainer => ContainerBitsPerSample != 0 && ContainerBitsPerSample != BitsPerSample; } /// /// Result of WAV structure validation /// private class WavValidationResult { public bool IsValid { get; set; } public string ErrorMessage { get; set; } = string.Empty; public int FmtChunkPos { get; set; } public int DataChunkPos { get; set; } public bool IsExtensible { get; set; } /// True when the EXTENSIBLE SubFormat is IEEE float rather than PCM. public bool IsFloat { get; set; } } } /// /// The raw PCM sample region of a WAV plus the format parameters needed to interpret it. /// is a view over the decoded buffer — the data chunk only, header excluded. /// /// The PCM sample bytes (interleaved by channel, little-endian). /// Number of interleaved channels. /// Samples per second. /// Bit depth per sample (8, 16, 24, or 32). public readonly record struct PcmData( ReadOnlyMemory Pcm, int Channels, int SampleRate, int BitsPerSample);