79bbbd4956
Processors now emit a ProcessedAudio plan with a streamed writer instead of a whole-file AudioBinary; vault writes stream via RegisterResourceStreamingAsync. Header parsing is bounded. Wave 2 (waveform/Opus) still re-reads the full file by design.
316 lines
11 KiB
C#
316 lines
11 KiB
C#
using DeepDrftContent.FileDatabase.Models;
|
||
|
||
namespace DeepDrftContent.Processors;
|
||
|
||
/// <summary>
|
||
/// Extracts metadata from an MP3 file and wraps its <b>unmodified</b> bytes in an
|
||
/// <see cref="AudioBinary"/> tagged <c>.mp3</c>. No transcoding — the vault stores the original
|
||
/// stream; only duration/bitrate metadata are computed from the first MPEG frame header (plus a
|
||
/// Xing/VBRI tag when present for accurate VBR duration).
|
||
/// </summary>
|
||
public class Mp3AudioProcessor
|
||
{
|
||
// MPEG1 Layer III bitrate table (kbps), indexed by the 4-bit bitrate index. 0 = free, 15 = bad.
|
||
private static readonly int[] Mpeg1Layer3Bitrates =
|
||
[0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320];
|
||
|
||
// MPEG2/2.5 Layer III bitrate table (kbps), indexed by 4-bit bitrate index. 0 = free, 15 = bad.
|
||
private static readonly int[] Mpeg2Layer3Bitrates =
|
||
[0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160];
|
||
|
||
private static readonly int[] Mpeg1SampleRates = [44100, 48000, 32000];
|
||
private static readonly int[] Mpeg2SampleRates = [22050, 24000, 16000];
|
||
private static readonly int[] Mpeg25SampleRates = [11025, 12000, 8000];
|
||
|
||
private const double FallbackDuration = 180.0;
|
||
private const int FallbackBitrate = 320;
|
||
|
||
// Metadata lives in the leading ID3v2 tag plus the first MPEG frame. Cap the header read so a
|
||
// large MP3 is not pulled into memory whole just to read it; a tag larger than this (very large
|
||
// embedded art) simply falls back to the CBR/default estimate, never an OOM. The body is stored
|
||
// by streaming the original file, not from this window.
|
||
private const long HeaderCap = 8 * 1024 * 1024;
|
||
|
||
public async Task<ProcessedAudio?> ProcessMp3FileAsync(string filePath, CancellationToken cancellationToken = default)
|
||
{
|
||
if (!File.Exists(filePath))
|
||
{
|
||
throw new FileNotFoundException($"MP3 file not found: {filePath}");
|
||
}
|
||
|
||
if (!Path.GetExtension(filePath).Equals(".mp3", StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
throw new ArgumentException("File must be an MP3 file", nameof(filePath));
|
||
}
|
||
|
||
var fileLength = new FileInfo(filePath).Length;
|
||
var window = await AudioStoreStream.ReadPrefixAsync(filePath, HeaderCap, cancellationToken);
|
||
var meta = ExtractMp3Metadata(window, fileLength);
|
||
|
||
// MP3 is stored unmodified — passthrough the original bytes via a streamed disk-to-disk copy.
|
||
return ProcessedAudio.Passthrough(filePath, ".mp3", meta.Duration, meta.Bitrate, fileLength);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Parses the first valid MPEG frame (after any ID3v2 tag) and any Xing/VBRI tag inside it.
|
||
/// On any parse failure, logs a warning and returns synthetic defaults — never throws.
|
||
/// <paramref name="fileLength"/> is the true file size (the header window may be shorter), used
|
||
/// for the CBR duration estimate.
|
||
/// </summary>
|
||
private static Mp3Metadata ExtractMp3Metadata(byte[] buffer, long fileLength)
|
||
{
|
||
try
|
||
{
|
||
var frameStart = FindFirstFrame(buffer);
|
||
if (frameStart < 0)
|
||
{
|
||
throw new InvalidDataException("No valid MPEG frame sync found");
|
||
}
|
||
|
||
var header = DecodeFrameHeader(buffer, frameStart);
|
||
var duration = ComputeDuration(buffer, frameStart, header, fileLength);
|
||
|
||
return new Mp3Metadata { Duration = duration, Bitrate = header.BitrateKbps };
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
Console.WriteLine($"Warning: MP3 parsing failed, using defaults: {ex.Message}");
|
||
return new Mp3Metadata { Duration = FallbackDuration, Bitrate = FallbackBitrate };
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Returns the offset of the first valid MPEG frame, skipping a leading ID3v2 tag if present.
|
||
/// Scans for a 0xFF / 0xE0-syncword pair and fully validates the 4-byte header before accepting.
|
||
/// </summary>
|
||
private static int FindFirstFrame(byte[] buffer)
|
||
{
|
||
var start = SkipId3v2(buffer);
|
||
|
||
for (int i = start; i < buffer.Length - 4; i++)
|
||
{
|
||
if (buffer[i] != 0xFF || (buffer[i + 1] & 0xE0) != 0xE0)
|
||
{
|
||
continue;
|
||
}
|
||
|
||
if (IsValidFrameHeader(buffer, i))
|
||
{
|
||
return i;
|
||
}
|
||
}
|
||
|
||
return -1;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Returns the byte offset just past an ID3v2 tag, or 0 if none. The tag size is a syncsafe
|
||
/// big-endian uint28 at bytes 6–9 (each byte's MSB is 0). A footer (flag bit 4 of byte 5) adds 10.
|
||
/// </summary>
|
||
private static int SkipId3v2(byte[] buffer)
|
||
{
|
||
if (buffer.Length < 10 || buffer[0] != 'I' || buffer[1] != 'D' || buffer[2] != '3')
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
var size = (buffer[6] << 21) | (buffer[7] << 14) | (buffer[8] << 7) | buffer[9];
|
||
var skip = 10 + size;
|
||
if ((buffer[5] & 0x10) != 0)
|
||
{
|
||
skip += 10; // footer present
|
||
}
|
||
|
||
return skip <= buffer.Length ? skip : 0;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Fully validates a candidate 4-byte frame header: layer must be III, and version, bitrate
|
||
/// index, and sample-rate index must all be non-reserved (rejects free bitrate, bad index 0xF,
|
||
/// and reserved sample rate 3).
|
||
/// </summary>
|
||
private static bool IsValidFrameHeader(byte[] buffer, int pos)
|
||
{
|
||
var b1 = buffer[pos + 1];
|
||
var b2 = buffer[pos + 2];
|
||
|
||
var versionBits = (b1 >> 3) & 0x03;
|
||
if (versionBits == 1) // 1 = reserved
|
||
{
|
||
return false;
|
||
}
|
||
|
||
var layerBits = (b1 >> 1) & 0x03;
|
||
if (layerBits != 1) // 1 = Layer III; this processor handles Layer III only
|
||
{
|
||
return false;
|
||
}
|
||
|
||
var bitrateIndex = (b2 >> 4) & 0x0F;
|
||
if (bitrateIndex == 0 || bitrateIndex == 0x0F) // 0 = free, 0xF = bad
|
||
{
|
||
return false;
|
||
}
|
||
|
||
var sampleRateIndex = (b2 >> 2) & 0x03;
|
||
if (sampleRateIndex == 3) // reserved
|
||
{
|
||
return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
private static FrameHeader DecodeFrameHeader(byte[] buffer, int pos)
|
||
{
|
||
var b1 = buffer[pos + 1];
|
||
var b2 = buffer[pos + 2];
|
||
var b3 = buffer[pos + 3];
|
||
|
||
var versionBits = (b1 >> 3) & 0x03;
|
||
var version = versionBits switch
|
||
{
|
||
3 => MpegVersion.Mpeg1,
|
||
2 => MpegVersion.Mpeg2,
|
||
_ => MpegVersion.Mpeg25, // 0 = MPEG2.5
|
||
};
|
||
|
||
var bitrateIndex = (b2 >> 4) & 0x0F;
|
||
var bitrateTable = version == MpegVersion.Mpeg1 ? Mpeg1Layer3Bitrates : Mpeg2Layer3Bitrates;
|
||
var bitrateKbps = bitrateTable[bitrateIndex];
|
||
|
||
var sampleRateIndex = (b2 >> 2) & 0x03;
|
||
var sampleRate = version switch
|
||
{
|
||
MpegVersion.Mpeg1 => Mpeg1SampleRates[sampleRateIndex],
|
||
MpegVersion.Mpeg2 => Mpeg2SampleRates[sampleRateIndex],
|
||
_ => Mpeg25SampleRates[sampleRateIndex],
|
||
};
|
||
|
||
var channelMode = (b3 >> 6) & 0x03;
|
||
var channels = channelMode == 3 ? 1 : 2;
|
||
var samplesPerFrame = version == MpegVersion.Mpeg1 ? 1152 : 576;
|
||
|
||
return new FrameHeader
|
||
{
|
||
Version = version,
|
||
BitrateKbps = bitrateKbps,
|
||
SampleRate = sampleRate,
|
||
Channels = channels,
|
||
SamplesPerFrame = samplesPerFrame,
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// Computes duration from a Xing/Info or VBRI tag (accurate for VBR) when present; otherwise
|
||
/// falls back to the CBR estimate fileSize / (bitrate_kbps * 125). Guards divide-by-zero.
|
||
/// </summary>
|
||
private static double ComputeDuration(byte[] buffer, int frameStart, FrameHeader header, long fileLength)
|
||
{
|
||
var xingFrames = ReadXingFrameCount(buffer, frameStart, header);
|
||
if (xingFrames > 0 && header.SampleRate > 0)
|
||
{
|
||
return (double)xingFrames * header.SamplesPerFrame / header.SampleRate;
|
||
}
|
||
|
||
var vbriFrames = ReadVbriFrameCount(buffer, frameStart);
|
||
if (vbriFrames > 0 && header.SampleRate > 0)
|
||
{
|
||
return (double)vbriFrames * header.SamplesPerFrame / header.SampleRate;
|
||
}
|
||
|
||
// CBR fallback: bitrate_kbps * 1000 / 8 bytes per second = bitrate_kbps * 125. Uses the true
|
||
// file length (not the bounded header window), excluding the ID3v2 tag bytes before frameStart.
|
||
var bytesPerSecond = header.BitrateKbps * 125;
|
||
return bytesPerSecond > 0 ? (double)(fileLength - frameStart) / bytesPerSecond : FallbackDuration;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Reads the Xing/Info VBR total-frame count from the side-information region of the first frame,
|
||
/// or 0 if no Xing tag or no frame-count flag. Side-info offset depends on version and channels.
|
||
/// </summary>
|
||
private static int ReadXingFrameCount(byte[] buffer, int frameStart, FrameHeader header)
|
||
{
|
||
var sideInfoSize = header.Version == MpegVersion.Mpeg1
|
||
? (header.Channels == 1 ? 17 : 32)
|
||
: (header.Channels == 1 ? 9 : 17);
|
||
|
||
var tagPos = frameStart + 4 + sideInfoSize;
|
||
if (tagPos + 12 > buffer.Length)
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
if (!MatchesAscii(buffer, tagPos, "Xing") && !MatchesAscii(buffer, tagPos, "Info"))
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
var flags = ReadUInt32BigEndian(buffer, tagPos + 4);
|
||
if ((flags & 0x01) == 0) // bit 0 = frame-count present
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
return (int)ReadUInt32BigEndian(buffer, tagPos + 8);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Reads the Fraunhofer VBRI total-frame count. The VBRI tag sits at a fixed offset 32 past the
|
||
/// frame header (frameStart + 4 + 32); the frame count is a big-endian uint32 at tag offset 14.
|
||
/// </summary>
|
||
private static int ReadVbriFrameCount(byte[] buffer, int frameStart)
|
||
{
|
||
var tagPos = frameStart + 4 + 32;
|
||
if (tagPos + 18 > buffer.Length)
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
if (!MatchesAscii(buffer, tagPos, "VBRI"))
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
return (int)ReadUInt32BigEndian(buffer, tagPos + 14);
|
||
}
|
||
|
||
private static bool MatchesAscii(byte[] buffer, int pos, string tag)
|
||
{
|
||
for (int i = 0; i < tag.Length; i++)
|
||
{
|
||
if (buffer[pos + i] != (byte)tag[i])
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
private static uint ReadUInt32BigEndian(byte[] buffer, int pos) =>
|
||
((uint)buffer[pos] << 24) | ((uint)buffer[pos + 1] << 16) | ((uint)buffer[pos + 2] << 8) | buffer[pos + 3];
|
||
|
||
private enum MpegVersion
|
||
{
|
||
Mpeg1,
|
||
Mpeg2,
|
||
Mpeg25,
|
||
}
|
||
|
||
private sealed class FrameHeader
|
||
{
|
||
public MpegVersion Version { get; init; }
|
||
public int BitrateKbps { get; init; }
|
||
public int SampleRate { get; init; }
|
||
public int Channels { get; init; }
|
||
public int SamplesPerFrame { get; init; }
|
||
}
|
||
|
||
private sealed class Mp3Metadata
|
||
{
|
||
public double Duration { get; init; }
|
||
public int Bitrate { get; init; }
|
||
}
|
||
}
|