Files
deepdrft/DeepDrftContent/Processors/Mp3AudioProcessor.cs
T
daniel-c-harvey 3bb8104967 feat(audio): add MP3 and FLAC upload support via format-routed processors
AudioProcessorRouter dispatches by extension; vault stores original bytes with correct MIME type.
2026-06-11 05:49:17 -04:00

312 lines
10 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using DeepDrftContent.FileDatabase.Models;
namespace DeepDrftContent.Processors;
/// <summary>
/// Extracts metadata from an MP3 file and wraps its <b>unmodified</b> bytes in an
/// <see cref="AudioBinary"/> tagged <c>.mp3</c>. No transcoding — the vault stores the original
/// stream; only duration/bitrate metadata are computed from the first MPEG frame header (plus a
/// Xing/VBRI tag when present for accurate VBR duration).
/// </summary>
public class Mp3AudioProcessor
{
// MPEG1 Layer III bitrate table (kbps), indexed by the 4-bit bitrate index. 0 = free, 15 = bad.
private static readonly int[] Mpeg1Layer3Bitrates =
[0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320];
private static readonly int[] Mpeg1SampleRates = [44100, 48000, 32000];
private static readonly int[] Mpeg2SampleRates = [22050, 24000, 16000];
private static readonly int[] Mpeg25SampleRates = [11025, 12000, 8000];
private const double FallbackDuration = 180.0;
private const int FallbackBitrate = 320;
public async Task<AudioBinary?> ProcessMp3FileAsync(string filePath)
{
if (!File.Exists(filePath))
{
throw new FileNotFoundException($"MP3 file not found: {filePath}");
}
if (!Path.GetExtension(filePath).Equals(".mp3", StringComparison.OrdinalIgnoreCase))
{
throw new ArgumentException("File must be an MP3 file", nameof(filePath));
}
var buffer = await File.ReadAllBytesAsync(filePath);
var meta = ExtractMp3Metadata(buffer);
var parameters = new AudioBinaryParams(
Buffer: buffer,
Size: buffer.Length,
Extension: ".mp3",
Duration: meta.Duration,
Bitrate: meta.Bitrate);
return new AudioBinary(parameters);
}
/// <summary>
/// Parses the first valid MPEG frame (after any ID3v2 tag) and any Xing/VBRI tag inside it.
/// On any parse failure, logs a warning and returns synthetic defaults — never throws.
/// </summary>
private static Mp3Metadata ExtractMp3Metadata(byte[] buffer)
{
try
{
var frameStart = FindFirstFrame(buffer);
if (frameStart < 0)
{
throw new InvalidDataException("No valid MPEG frame sync found");
}
var header = DecodeFrameHeader(buffer, frameStart);
var duration = ComputeDuration(buffer, frameStart, header);
return new Mp3Metadata { Duration = duration, Bitrate = header.BitrateKbps };
}
catch (Exception ex)
{
Console.WriteLine($"Warning: MP3 parsing failed, using defaults: {ex.Message}");
return new Mp3Metadata { Duration = FallbackDuration, Bitrate = FallbackBitrate };
}
}
/// <summary>
/// Returns the offset of the first valid MPEG frame, skipping a leading ID3v2 tag if present.
/// Scans for a 0xFF / 0xE0-syncword pair and fully validates the 4-byte header before accepting.
/// </summary>
private static int FindFirstFrame(byte[] buffer)
{
var start = SkipId3v2(buffer);
for (int i = start; i < buffer.Length - 4; i++)
{
if (buffer[i] != 0xFF || (buffer[i + 1] & 0xE0) != 0xE0)
{
continue;
}
if (IsValidFrameHeader(buffer, i))
{
return i;
}
}
return -1;
}
/// <summary>
/// Returns the byte offset just past an ID3v2 tag, or 0 if none. The tag size is a syncsafe
/// big-endian uint28 at bytes 69 (each byte's MSB is 0). A footer (flag bit 4 of byte 5) adds 10.
/// </summary>
private static int SkipId3v2(byte[] buffer)
{
if (buffer.Length < 10 || buffer[0] != 'I' || buffer[1] != 'D' || buffer[2] != '3')
{
return 0;
}
var size = (buffer[6] << 21) | (buffer[7] << 14) | (buffer[8] << 7) | buffer[9];
var skip = 10 + size;
if ((buffer[5] & 0x10) != 0)
{
skip += 10; // footer present
}
return skip <= buffer.Length ? skip : 0;
}
/// <summary>
/// Fully validates a candidate 4-byte frame header: layer must be III, and version, bitrate
/// index, and sample-rate index must all be non-reserved (rejects free bitrate, bad index 0xF,
/// and reserved sample rate 3).
/// </summary>
private static bool IsValidFrameHeader(byte[] buffer, int pos)
{
var b1 = buffer[pos + 1];
var b2 = buffer[pos + 2];
var versionBits = (b1 >> 3) & 0x03;
if (versionBits == 1) // 1 = reserved
{
return false;
}
var layerBits = (b1 >> 1) & 0x03;
if (layerBits != 1) // 1 = Layer III; this processor handles Layer III only
{
return false;
}
var bitrateIndex = (b2 >> 4) & 0x0F;
if (bitrateIndex == 0 || bitrateIndex == 0x0F) // 0 = free, 0xF = bad
{
return false;
}
var sampleRateIndex = (b2 >> 2) & 0x03;
if (sampleRateIndex == 3) // reserved
{
return false;
}
return true;
}
private static FrameHeader DecodeFrameHeader(byte[] buffer, int pos)
{
var b1 = buffer[pos + 1];
var b2 = buffer[pos + 2];
var b3 = buffer[pos + 3];
var versionBits = (b1 >> 3) & 0x03;
var version = versionBits switch
{
3 => MpegVersion.Mpeg1,
2 => MpegVersion.Mpeg2,
_ => MpegVersion.Mpeg25, // 0 = MPEG2.5
};
var bitrateIndex = (b2 >> 4) & 0x0F;
var bitrateKbps = Mpeg1Layer3Bitrates[bitrateIndex];
var sampleRateIndex = (b2 >> 2) & 0x03;
var sampleRate = version switch
{
MpegVersion.Mpeg1 => Mpeg1SampleRates[sampleRateIndex],
MpegVersion.Mpeg2 => Mpeg2SampleRates[sampleRateIndex],
_ => Mpeg25SampleRates[sampleRateIndex],
};
var paddingBit = (b2 >> 1) & 0x01;
var channelMode = (b3 >> 6) & 0x03;
var channels = channelMode == 3 ? 1 : 2;
var samplesPerFrame = version == MpegVersion.Mpeg1 ? 1152 : 576;
var frameSize = (int)Math.Floor(144.0 * (bitrateKbps * 1000) / sampleRate) + paddingBit;
return new FrameHeader
{
Version = version,
BitrateKbps = bitrateKbps,
SampleRate = sampleRate,
Channels = channels,
SamplesPerFrame = samplesPerFrame,
FrameSize = frameSize,
};
}
/// <summary>
/// Computes duration from a Xing/Info or VBRI tag (accurate for VBR) when present; otherwise
/// falls back to the CBR estimate fileSize / (bitrate_kbps * 125). Guards divide-by-zero.
/// </summary>
private static double ComputeDuration(byte[] buffer, int frameStart, FrameHeader header)
{
var xingFrames = ReadXingFrameCount(buffer, frameStart, header);
if (xingFrames > 0 && header.SampleRate > 0)
{
return (double)xingFrames * header.SamplesPerFrame / header.SampleRate;
}
var vbriFrames = ReadVbriFrameCount(buffer, frameStart);
if (vbriFrames > 0 && header.SampleRate > 0)
{
return (double)vbriFrames * header.SamplesPerFrame / header.SampleRate;
}
// CBR fallback: bitrate_kbps * 1000 / 8 bytes per second = bitrate_kbps * 125.
var bytesPerSecond = header.BitrateKbps * 125;
return bytesPerSecond > 0 ? (double)buffer.Length / bytesPerSecond : FallbackDuration;
}
/// <summary>
/// Reads the Xing/Info VBR total-frame count from the side-information region of the first frame,
/// or 0 if no Xing tag or no frame-count flag. Side-info offset depends on version and channels.
/// </summary>
private static int ReadXingFrameCount(byte[] buffer, int frameStart, FrameHeader header)
{
var sideInfoSize = header.Version == MpegVersion.Mpeg1
? (header.Channels == 1 ? 17 : 32)
: (header.Channels == 1 ? 9 : 17);
var tagPos = frameStart + 4 + sideInfoSize;
if (tagPos + 12 > buffer.Length)
{
return 0;
}
if (!MatchesAscii(buffer, tagPos, "Xing") && !MatchesAscii(buffer, tagPos, "Info"))
{
return 0;
}
var flags = ReadUInt32BigEndian(buffer, tagPos + 4);
if ((flags & 0x01) == 0) // bit 0 = frame-count present
{
return 0;
}
return (int)ReadUInt32BigEndian(buffer, tagPos + 8);
}
/// <summary>
/// Reads the Fraunhofer VBRI total-frame count. The VBRI tag sits at a fixed offset 32 past the
/// frame header (frameStart + 4 + 32); the frame count is a big-endian uint32 at tag offset 14.
/// </summary>
private static int ReadVbriFrameCount(byte[] buffer, int frameStart)
{
var tagPos = frameStart + 4 + 32;
if (tagPos + 18 > buffer.Length)
{
return 0;
}
if (!MatchesAscii(buffer, tagPos, "VBRI"))
{
return 0;
}
return (int)ReadUInt32BigEndian(buffer, tagPos + 14);
}
private static bool MatchesAscii(byte[] buffer, int pos, string tag)
{
for (int i = 0; i < tag.Length; i++)
{
if (buffer[pos + i] != (byte)tag[i])
{
return false;
}
}
return true;
}
private static uint ReadUInt32BigEndian(byte[] buffer, int pos) =>
((uint)buffer[pos] << 24) | ((uint)buffer[pos + 1] << 16) | ((uint)buffer[pos + 2] << 8) | buffer[pos + 3];
private enum MpegVersion
{
Mpeg1,
Mpeg2,
Mpeg25,
}
private sealed class FrameHeader
{
public MpegVersion Version { get; init; }
public int BitrateKbps { get; init; }
public int SampleRate { get; init; }
public int Channels { get; init; }
public int SamplesPerFrame { get; init; }
public int FrameSize { get; init; }
}
private sealed class Mp3Metadata
{
public double Duration { get; init; }
public int Bitrate { get; init; }
}
}