Files
deepdrft/DeepDrftContent/Processors/Mp3AudioProcessor.cs
T
daniel-c-harvey 79bbbd4956 fix(api): stream audio store path to eliminate whole-file buffering (OOM)
Processors now emit a ProcessedAudio plan with a streamed writer instead of a whole-file AudioBinary; vault writes stream via RegisterResourceStreamingAsync. Header parsing is bounded. Wave 2 (waveform/Opus) still re-reads the full file by design.
2026-06-25 15:27:28 -04:00

316 lines
11 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using DeepDrftContent.FileDatabase.Models;
namespace DeepDrftContent.Processors;
/// <summary>
/// Extracts metadata from an MP3 file and wraps its <b>unmodified</b> bytes in an
/// <see cref="AudioBinary"/> tagged <c>.mp3</c>. No transcoding — the vault stores the original
/// stream; only duration/bitrate metadata are computed from the first MPEG frame header (plus a
/// Xing/VBRI tag when present for accurate VBR duration).
/// </summary>
public class Mp3AudioProcessor
{
// MPEG1 Layer III bitrate table (kbps), indexed by the 4-bit bitrate index. 0 = free, 15 = bad.
private static readonly int[] Mpeg1Layer3Bitrates =
[0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320];
// MPEG2/2.5 Layer III bitrate table (kbps), indexed by 4-bit bitrate index. 0 = free, 15 = bad.
private static readonly int[] Mpeg2Layer3Bitrates =
[0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160];
private static readonly int[] Mpeg1SampleRates = [44100, 48000, 32000];
private static readonly int[] Mpeg2SampleRates = [22050, 24000, 16000];
private static readonly int[] Mpeg25SampleRates = [11025, 12000, 8000];
private const double FallbackDuration = 180.0;
private const int FallbackBitrate = 320;
// Metadata lives in the leading ID3v2 tag plus the first MPEG frame. Cap the header read so a
// large MP3 is not pulled into memory whole just to read it; a tag larger than this (very large
// embedded art) simply falls back to the CBR/default estimate, never an OOM. The body is stored
// by streaming the original file, not from this window.
private const long HeaderCap = 8 * 1024 * 1024;
public async Task<ProcessedAudio?> ProcessMp3FileAsync(string filePath, CancellationToken cancellationToken = default)
{
if (!File.Exists(filePath))
{
throw new FileNotFoundException($"MP3 file not found: {filePath}");
}
if (!Path.GetExtension(filePath).Equals(".mp3", StringComparison.OrdinalIgnoreCase))
{
throw new ArgumentException("File must be an MP3 file", nameof(filePath));
}
var fileLength = new FileInfo(filePath).Length;
var window = await AudioStoreStream.ReadPrefixAsync(filePath, HeaderCap, cancellationToken);
var meta = ExtractMp3Metadata(window, fileLength);
// MP3 is stored unmodified — passthrough the original bytes via a streamed disk-to-disk copy.
return ProcessedAudio.Passthrough(filePath, ".mp3", meta.Duration, meta.Bitrate, fileLength);
}
/// <summary>
/// Parses the first valid MPEG frame (after any ID3v2 tag) and any Xing/VBRI tag inside it.
/// On any parse failure, logs a warning and returns synthetic defaults — never throws.
/// <paramref name="fileLength"/> is the true file size (the header window may be shorter), used
/// for the CBR duration estimate.
/// </summary>
private static Mp3Metadata ExtractMp3Metadata(byte[] buffer, long fileLength)
{
try
{
var frameStart = FindFirstFrame(buffer);
if (frameStart < 0)
{
throw new InvalidDataException("No valid MPEG frame sync found");
}
var header = DecodeFrameHeader(buffer, frameStart);
var duration = ComputeDuration(buffer, frameStart, header, fileLength);
return new Mp3Metadata { Duration = duration, Bitrate = header.BitrateKbps };
}
catch (Exception ex)
{
Console.WriteLine($"Warning: MP3 parsing failed, using defaults: {ex.Message}");
return new Mp3Metadata { Duration = FallbackDuration, Bitrate = FallbackBitrate };
}
}
/// <summary>
/// Returns the offset of the first valid MPEG frame, skipping a leading ID3v2 tag if present.
/// Scans for a 0xFF / 0xE0-syncword pair and fully validates the 4-byte header before accepting.
/// </summary>
private static int FindFirstFrame(byte[] buffer)
{
var start = SkipId3v2(buffer);
for (int i = start; i < buffer.Length - 4; i++)
{
if (buffer[i] != 0xFF || (buffer[i + 1] & 0xE0) != 0xE0)
{
continue;
}
if (IsValidFrameHeader(buffer, i))
{
return i;
}
}
return -1;
}
/// <summary>
/// Returns the byte offset just past an ID3v2 tag, or 0 if none. The tag size is a syncsafe
/// big-endian uint28 at bytes 69 (each byte's MSB is 0). A footer (flag bit 4 of byte 5) adds 10.
/// </summary>
private static int SkipId3v2(byte[] buffer)
{
if (buffer.Length < 10 || buffer[0] != 'I' || buffer[1] != 'D' || buffer[2] != '3')
{
return 0;
}
var size = (buffer[6] << 21) | (buffer[7] << 14) | (buffer[8] << 7) | buffer[9];
var skip = 10 + size;
if ((buffer[5] & 0x10) != 0)
{
skip += 10; // footer present
}
return skip <= buffer.Length ? skip : 0;
}
/// <summary>
/// Fully validates a candidate 4-byte frame header: layer must be III, and version, bitrate
/// index, and sample-rate index must all be non-reserved (rejects free bitrate, bad index 0xF,
/// and reserved sample rate 3).
/// </summary>
private static bool IsValidFrameHeader(byte[] buffer, int pos)
{
var b1 = buffer[pos + 1];
var b2 = buffer[pos + 2];
var versionBits = (b1 >> 3) & 0x03;
if (versionBits == 1) // 1 = reserved
{
return false;
}
var layerBits = (b1 >> 1) & 0x03;
if (layerBits != 1) // 1 = Layer III; this processor handles Layer III only
{
return false;
}
var bitrateIndex = (b2 >> 4) & 0x0F;
if (bitrateIndex == 0 || bitrateIndex == 0x0F) // 0 = free, 0xF = bad
{
return false;
}
var sampleRateIndex = (b2 >> 2) & 0x03;
if (sampleRateIndex == 3) // reserved
{
return false;
}
return true;
}
private static FrameHeader DecodeFrameHeader(byte[] buffer, int pos)
{
var b1 = buffer[pos + 1];
var b2 = buffer[pos + 2];
var b3 = buffer[pos + 3];
var versionBits = (b1 >> 3) & 0x03;
var version = versionBits switch
{
3 => MpegVersion.Mpeg1,
2 => MpegVersion.Mpeg2,
_ => MpegVersion.Mpeg25, // 0 = MPEG2.5
};
var bitrateIndex = (b2 >> 4) & 0x0F;
var bitrateTable = version == MpegVersion.Mpeg1 ? Mpeg1Layer3Bitrates : Mpeg2Layer3Bitrates;
var bitrateKbps = bitrateTable[bitrateIndex];
var sampleRateIndex = (b2 >> 2) & 0x03;
var sampleRate = version switch
{
MpegVersion.Mpeg1 => Mpeg1SampleRates[sampleRateIndex],
MpegVersion.Mpeg2 => Mpeg2SampleRates[sampleRateIndex],
_ => Mpeg25SampleRates[sampleRateIndex],
};
var channelMode = (b3 >> 6) & 0x03;
var channels = channelMode == 3 ? 1 : 2;
var samplesPerFrame = version == MpegVersion.Mpeg1 ? 1152 : 576;
return new FrameHeader
{
Version = version,
BitrateKbps = bitrateKbps,
SampleRate = sampleRate,
Channels = channels,
SamplesPerFrame = samplesPerFrame,
};
}
/// <summary>
/// Computes duration from a Xing/Info or VBRI tag (accurate for VBR) when present; otherwise
/// falls back to the CBR estimate fileSize / (bitrate_kbps * 125). Guards divide-by-zero.
/// </summary>
private static double ComputeDuration(byte[] buffer, int frameStart, FrameHeader header, long fileLength)
{
var xingFrames = ReadXingFrameCount(buffer, frameStart, header);
if (xingFrames > 0 && header.SampleRate > 0)
{
return (double)xingFrames * header.SamplesPerFrame / header.SampleRate;
}
var vbriFrames = ReadVbriFrameCount(buffer, frameStart);
if (vbriFrames > 0 && header.SampleRate > 0)
{
return (double)vbriFrames * header.SamplesPerFrame / header.SampleRate;
}
// CBR fallback: bitrate_kbps * 1000 / 8 bytes per second = bitrate_kbps * 125. Uses the true
// file length (not the bounded header window), excluding the ID3v2 tag bytes before frameStart.
var bytesPerSecond = header.BitrateKbps * 125;
return bytesPerSecond > 0 ? (double)(fileLength - frameStart) / bytesPerSecond : FallbackDuration;
}
/// <summary>
/// Reads the Xing/Info VBR total-frame count from the side-information region of the first frame,
/// or 0 if no Xing tag or no frame-count flag. Side-info offset depends on version and channels.
/// </summary>
private static int ReadXingFrameCount(byte[] buffer, int frameStart, FrameHeader header)
{
var sideInfoSize = header.Version == MpegVersion.Mpeg1
? (header.Channels == 1 ? 17 : 32)
: (header.Channels == 1 ? 9 : 17);
var tagPos = frameStart + 4 + sideInfoSize;
if (tagPos + 12 > buffer.Length)
{
return 0;
}
if (!MatchesAscii(buffer, tagPos, "Xing") && !MatchesAscii(buffer, tagPos, "Info"))
{
return 0;
}
var flags = ReadUInt32BigEndian(buffer, tagPos + 4);
if ((flags & 0x01) == 0) // bit 0 = frame-count present
{
return 0;
}
return (int)ReadUInt32BigEndian(buffer, tagPos + 8);
}
/// <summary>
/// Reads the Fraunhofer VBRI total-frame count. The VBRI tag sits at a fixed offset 32 past the
/// frame header (frameStart + 4 + 32); the frame count is a big-endian uint32 at tag offset 14.
/// </summary>
private static int ReadVbriFrameCount(byte[] buffer, int frameStart)
{
var tagPos = frameStart + 4 + 32;
if (tagPos + 18 > buffer.Length)
{
return 0;
}
if (!MatchesAscii(buffer, tagPos, "VBRI"))
{
return 0;
}
return (int)ReadUInt32BigEndian(buffer, tagPos + 14);
}
private static bool MatchesAscii(byte[] buffer, int pos, string tag)
{
for (int i = 0; i < tag.Length; i++)
{
if (buffer[pos + i] != (byte)tag[i])
{
return false;
}
}
return true;
}
private static uint ReadUInt32BigEndian(byte[] buffer, int pos) =>
((uint)buffer[pos] << 24) | ((uint)buffer[pos + 1] << 16) | ((uint)buffer[pos + 2] << 8) | buffer[pos + 3];
private enum MpegVersion
{
Mpeg1,
Mpeg2,
Mpeg25,
}
private sealed class FrameHeader
{
public MpegVersion Version { get; init; }
public int BitrateKbps { get; init; }
public int SampleRate { get; init; }
public int Channels { get; init; }
public int SamplesPerFrame { get; init; }
}
private sealed class Mp3Metadata
{
public double Duration { get; init; }
public int Bitrate { get; init; }
}
}