Merge branch 'p1.2-w2-t1-mp3-decoder' into dev
This commit is contained in:
@@ -0,0 +1,326 @@
|
||||
/**
|
||||
* Mp3FormatDecoder - MP3 (MPEG-1/2/2.5 Layer III) implementation of IFormatDecoder.
|
||||
*
|
||||
* All MP3-specific stream logic lives here: ID3v2 skipping, MPEG frame-sync scanning,
|
||||
* frame-header decode, Xing/Info/VBRI VBR-header detection, segment sizing, and seek
|
||||
* byte-offset math (CBR estimate or VBR TOC interpolation). StreamDecoder delegates to
|
||||
* this via IFormatDecoder and holds no MP3 knowledge of its own.
|
||||
*
|
||||
* MP3 frames are self-contained, so wrapSegment is a zero-copy passthrough — the browser's
|
||||
* decodeAudioData accepts raw frame bytes directly and tolerates a partial leading frame.
|
||||
*/
|
||||
|
||||
import { FormatInfo, IFormatDecoder, Mp3VbrSeekData } from './IFormatDecoder.js';
|
||||
|
||||
export class Mp3FormatDecoder implements IFormatDecoder {
|
||||
// MPEG Layer III bitrate tables (kbps), indexed by the 4-bit bitrate index.
|
||||
// Index 0 (free) and 15 (reserved) are invalid and rejected during frame validation.
|
||||
private static readonly BITRATES_MPEG1 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320];
|
||||
private static readonly BITRATES_MPEG2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160];
|
||||
|
||||
// Sample-rate tables (Hz), indexed by the 2-bit sample-rate index (3 = reserved).
|
||||
private static readonly SAMPLE_RATES_MPEG1 = [44100, 48000, 32000];
|
||||
private static readonly SAMPLE_RATES_MPEG2 = [22050, 24000, 16000];
|
||||
private static readonly SAMPLE_RATES_MPEG25 = [11025, 12000, 8000];
|
||||
|
||||
tryParseHeader(chunks: Uint8Array[], totalSize: number): FormatInfo | null {
|
||||
const buffer = Mp3FormatDecoder.concat(chunks, totalSize);
|
||||
|
||||
// Need at least the 10-byte ID3v2 header probe plus a 4-byte frame header.
|
||||
if (buffer.length < 10) return null;
|
||||
|
||||
const searchStart = Mp3FormatDecoder.id3v2Skip(buffer);
|
||||
|
||||
// Scan for the first valid MPEG Layer III frame from the skip offset.
|
||||
const frameStart = Mp3FormatDecoder.findFrameSync(buffer, searchStart);
|
||||
if (frameStart < 0) return null;
|
||||
|
||||
// Decode the 4-byte frame header.
|
||||
const h = Mp3FormatDecoder.decodeFrameHeader(buffer, frameStart);
|
||||
if (!h) return null;
|
||||
|
||||
const vbr = Mp3FormatDecoder.parseVbrHeader(buffer, frameStart, h);
|
||||
|
||||
// Xing "Xing" tag → true VBR (size-based segments, no fixed blockAlign).
|
||||
// Xing "Info" tag or no VBR header → CBR (frame-aligned blockAlign).
|
||||
const isVbr = vbr?.isXing === true;
|
||||
const blockAlign = isVbr ? 0 : h.frameSize;
|
||||
|
||||
let totalDuration: number | null = null;
|
||||
let seekData: Mp3VbrSeekData | null = null;
|
||||
if (vbr && vbr.totalFrames > 0) {
|
||||
totalDuration = vbr.totalFrames * h.samplesPerFrame / h.sampleRate;
|
||||
}
|
||||
if (vbr?.toc) {
|
||||
seekData = { kind: 'mp3-vbr', toc: vbr.toc, totalBytes: vbr.totalBytes };
|
||||
}
|
||||
|
||||
return {
|
||||
sampleRate: h.sampleRate,
|
||||
channels: h.channels,
|
||||
bitsPerSample: 16, // conventional for MP3 (decoder handles the real format internally)
|
||||
byteRate: h.bitrateKbps * 125, // bytes/sec; used for CBR seek estimate
|
||||
blockAlign,
|
||||
totalDuration,
|
||||
audioDataOffset: frameStart, // file-absolute byte where audio frames begin
|
||||
seekData
|
||||
};
|
||||
}
|
||||
|
||||
getAlignedSegmentSize(
|
||||
info: FormatInfo,
|
||||
availableBytes: number,
|
||||
requestedSize: number,
|
||||
streamComplete: boolean
|
||||
): number {
|
||||
const minSize = 4096; // at least 4 KB before starting decode
|
||||
|
||||
if (availableBytes === 0) return 0;
|
||||
|
||||
if (info.blockAlign > 0) {
|
||||
// CBR: align to complete MP3 frames so each segment is independently decodable.
|
||||
// Guard: need at least one full frame; discard sub-frame tail rather than over-reading.
|
||||
if (availableBytes < info.blockAlign) return 0;
|
||||
|
||||
const minFrames = Math.ceil(minSize / info.blockAlign);
|
||||
const availableFrames = Math.floor(availableBytes / info.blockAlign);
|
||||
if (!streamComplete && availableFrames < minFrames) return 0;
|
||||
const requestedFrames = Math.floor(Math.min(requestedSize, availableBytes) / info.blockAlign);
|
||||
// Never exceed availableBytes (clamp via requestedFrames which is floor'd from availableBytes).
|
||||
return Math.max(streamComplete ? 1 : minFrames, requestedFrames) * info.blockAlign;
|
||||
}
|
||||
|
||||
// VBR: size-based — frame sizes vary, so we cannot align cleanly. The browser MP3
|
||||
// decoder skips a partial leading frame gracefully.
|
||||
if (!streamComplete && availableBytes < minSize) return 0;
|
||||
return Math.min(requestedSize, availableBytes);
|
||||
}
|
||||
|
||||
wrapSegment(_info: FormatInfo, rawBytes: Uint8Array): Uint8Array {
|
||||
// MP3 frames are self-contained; decodeAudioData accepts raw frame data directly.
|
||||
return rawBytes;
|
||||
}
|
||||
|
||||
calculateByteOffset(info: FormatInfo, positionSeconds: number): number {
|
||||
if (info.totalDuration == null || info.totalDuration <= 0) {
|
||||
// No duration info — CBR byteRate estimate.
|
||||
return Mp3FormatDecoder.byteRateOffset(info, positionSeconds);
|
||||
}
|
||||
|
||||
const mp3Vbr = info.seekData?.kind === 'mp3-vbr' ? info.seekData as Mp3VbrSeekData : null;
|
||||
|
||||
if (mp3Vbr?.toc && mp3Vbr.totalBytes > 0) {
|
||||
// VBR with Xing TOC — interpolate file-byte fraction from the percentage table.
|
||||
const percent = Math.min(99, positionSeconds / info.totalDuration * 100);
|
||||
const tocIdx = Math.floor(percent);
|
||||
const tocFrac = percent - tocIdx;
|
||||
const t0 = mp3Vbr.toc[tocIdx];
|
||||
const t1 = tocIdx < 99 ? mp3Vbr.toc[tocIdx + 1] : 256;
|
||||
const bytePercent = (t0 + (t1 - t0) * tocFrac) / 256.0;
|
||||
return info.audioDataOffset + Math.floor(bytePercent * mp3Vbr.totalBytes);
|
||||
}
|
||||
|
||||
// VBR without TOC or CBR with duration — byteRate estimate.
|
||||
return Mp3FormatDecoder.byteRateOffset(info, positionSeconds);
|
||||
}
|
||||
|
||||
/**
|
||||
* CBR/VBR-without-TOC seek estimate from average byte rate. Frame-aligns the result
|
||||
* when blockAlign is known (CBR); otherwise returns the raw byte position.
|
||||
*/
|
||||
private static byteRateOffset(info: FormatInfo, positionSeconds: number): number {
|
||||
if (info.byteRate <= 0) return info.audioDataOffset;
|
||||
const raw = Math.floor(positionSeconds * info.byteRate);
|
||||
if (info.blockAlign > 0) {
|
||||
return info.audioDataOffset + Math.floor(raw / info.blockAlign) * info.blockAlign;
|
||||
}
|
||||
return info.audioDataOffset + raw;
|
||||
}
|
||||
|
||||
/** Concatenate the accumulated chunks into one contiguous buffer. */
|
||||
private static concat(chunks: Uint8Array[], totalSize: number): Uint8Array {
|
||||
if (chunks.length === 1) return chunks[0];
|
||||
const buffer = new Uint8Array(totalSize);
|
||||
let offset = 0;
|
||||
for (const c of chunks) {
|
||||
buffer.set(c, offset);
|
||||
offset += c.length;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the byte offset past an ID3v2 tag if one is present at the buffer start,
|
||||
* else 0. The size field is a syncsafe big-endian uint28 (each byte's bit 7 is 0).
|
||||
*/
|
||||
private static id3v2Skip(buffer: Uint8Array): number {
|
||||
if (buffer.length < 10) return 0;
|
||||
if (buffer[0] !== 0x49 || buffer[1] !== 0x44 || buffer[2] !== 0x33) return 0; // 'I' 'D' '3'
|
||||
|
||||
const size = (buffer[6] << 21) | (buffer[7] << 14) | (buffer[8] << 7) | buffer[9];
|
||||
const hasFooter = (buffer[5] & 0x10) !== 0; // bit 4 of flags byte
|
||||
return 10 + size + (hasFooter ? 10 : 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan one byte at a time from `start` for the first byte position that begins a valid
|
||||
* MPEG Layer III frame. Returns the offset, or -1 if none found in the available bytes
|
||||
* (caller should wait for more data).
|
||||
*/
|
||||
private static findFrameSync(buffer: Uint8Array, start: number): number {
|
||||
// Need 4 bytes for a full frame header.
|
||||
for (let i = Math.max(0, start); i + 4 <= buffer.length; i++) {
|
||||
if (buffer[i] !== 0xff) continue;
|
||||
if ((buffer[i + 1] & 0xe0) !== 0xe0) continue; // top 3 bits of byte 1 must be set
|
||||
|
||||
const version = (buffer[i + 1] >> 3) & 3;
|
||||
const layer = (buffer[i + 1] >> 1) & 3;
|
||||
const bitrateIndex = buffer[i + 2] >> 4;
|
||||
const sampleRateIndex = (buffer[i + 2] >> 2) & 3;
|
||||
|
||||
if (version === 1) continue; // 01 = reserved
|
||||
if (layer !== 1) continue; // 01 = Layer III
|
||||
if (bitrateIndex === 0 || bitrateIndex === 15) continue; // free / reserved
|
||||
if (sampleRateIndex === 3) continue; // reserved
|
||||
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the 4-byte frame header at `frameStart`. Returns null if the resolved
|
||||
* bitrate/sample-rate are invalid (defensive — findFrameSync already validated indices).
|
||||
*/
|
||||
private static decodeFrameHeader(buffer: Uint8Array, frameStart: number): {
|
||||
version: number;
|
||||
sampleRate: number;
|
||||
channels: number;
|
||||
channelMode: number;
|
||||
bitrateKbps: number;
|
||||
samplesPerFrame: number;
|
||||
frameSize: number;
|
||||
} | null {
|
||||
const b1 = buffer[frameStart + 1];
|
||||
const b2 = buffer[frameStart + 2];
|
||||
const b3 = buffer[frameStart + 3];
|
||||
|
||||
const version = (b1 >> 3) & 3; // 3 = MPEG1, 2 = MPEG2, 0 = MPEG2.5
|
||||
const bitrateIndex = b2 >> 4;
|
||||
const sampleRateIndex = (b2 >> 2) & 3;
|
||||
const paddingBit = (b2 >> 1) & 1;
|
||||
const channelMode = b3 >> 6; // 0-2 = stereo variants, 3 = mono
|
||||
|
||||
const isMpeg1 = version === 3;
|
||||
|
||||
const bitrateKbps = isMpeg1
|
||||
? Mp3FormatDecoder.BITRATES_MPEG1[bitrateIndex]
|
||||
: Mp3FormatDecoder.BITRATES_MPEG2[bitrateIndex];
|
||||
|
||||
const sampleRate = version === 3
|
||||
? Mp3FormatDecoder.SAMPLE_RATES_MPEG1[sampleRateIndex]
|
||||
: version === 2
|
||||
? Mp3FormatDecoder.SAMPLE_RATES_MPEG2[sampleRateIndex]
|
||||
: Mp3FormatDecoder.SAMPLE_RATES_MPEG25[sampleRateIndex];
|
||||
|
||||
if (!bitrateKbps || !sampleRate) return null;
|
||||
|
||||
const channels = channelMode === 3 ? 1 : 2;
|
||||
const samplesPerFrame = isMpeg1 ? 1152 : 576;
|
||||
const frameSize = Math.floor(144 * bitrateKbps * 1000 / sampleRate) + paddingBit;
|
||||
|
||||
return { version, sampleRate, channels, channelMode, bitrateKbps, samplesPerFrame, frameSize };
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect a Xing/Info (VBR or CBR-with-info) or VBRI header inside the first frame.
|
||||
* Returns null when neither is present (pure CBR).
|
||||
*/
|
||||
private static parseVbrHeader(
|
||||
buffer: Uint8Array,
|
||||
frameStart: number,
|
||||
h: { version: number; channelMode: number }
|
||||
): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null {
|
||||
const isMpeg1 = h.version === 3;
|
||||
const isMono = h.channelMode === 3;
|
||||
|
||||
// Side-info region size depends on version and channel count.
|
||||
const sideInfoOffset = isMpeg1
|
||||
? (isMono ? 17 : 32)
|
||||
: (isMono ? 9 : 17);
|
||||
|
||||
const xing = Mp3FormatDecoder.parseXing(buffer, frameStart, sideInfoOffset);
|
||||
if (xing) return xing;
|
||||
|
||||
return Mp3FormatDecoder.parseVbri(buffer, frameStart);
|
||||
}
|
||||
|
||||
/** Parse a Xing/Info tag in the side-info region. Returns null if absent. */
|
||||
private static parseXing(
|
||||
buffer: Uint8Array,
|
||||
frameStart: number,
|
||||
sideInfoOffset: number
|
||||
): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null {
|
||||
const tagPos = frameStart + 4 + sideInfoOffset;
|
||||
if (tagPos + 8 > buffer.length) return null;
|
||||
|
||||
const isXing = Mp3FormatDecoder.matchAscii(buffer, tagPos, 'Xing');
|
||||
const isInfo = Mp3FormatDecoder.matchAscii(buffer, tagPos, 'Info');
|
||||
if (!isXing && !isInfo) return null;
|
||||
|
||||
const flags = Mp3FormatDecoder.readUint32BE(buffer, tagPos + 4);
|
||||
|
||||
// Fields are packed in flag order: frames, bytes, TOC, quality.
|
||||
let pos = tagPos + 8;
|
||||
let totalFrames = 0;
|
||||
let totalBytes = 0;
|
||||
let toc: Uint8Array | null = null;
|
||||
|
||||
if (flags & 0x1) { // frame count present
|
||||
if (pos + 4 > buffer.length) return { isXing, totalFrames, totalBytes, toc };
|
||||
totalFrames = Mp3FormatDecoder.readUint32BE(buffer, pos);
|
||||
pos += 4;
|
||||
}
|
||||
if (flags & 0x2) { // byte count present
|
||||
if (pos + 4 > buffer.length) return { isXing, totalFrames, totalBytes, toc };
|
||||
totalBytes = Mp3FormatDecoder.readUint32BE(buffer, pos);
|
||||
pos += 4;
|
||||
}
|
||||
if (flags & 0x4) { // TOC present — only meaningful alongside a frame count
|
||||
if (pos + 100 <= buffer.length && (flags & 0x1)) {
|
||||
toc = buffer.slice(pos, pos + 100);
|
||||
}
|
||||
pos += 100;
|
||||
}
|
||||
|
||||
return { isXing, totalFrames, totalBytes, toc };
|
||||
}
|
||||
|
||||
/** Parse a VBRI tag at the fixed Fraunhofer position. Returns null if absent. */
|
||||
private static parseVbri(
|
||||
buffer: Uint8Array,
|
||||
frameStart: number
|
||||
): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null {
|
||||
const pos = frameStart + 4 + 32;
|
||||
if (pos + 18 > buffer.length) return null;
|
||||
if (!Mp3FormatDecoder.matchAscii(buffer, pos, 'VBRI')) return null;
|
||||
|
||||
const totalFrames = Mp3FormatDecoder.readUint32BE(buffer, pos + 14);
|
||||
// VBRI is always VBR but its TOC layout differs from Xing's percentage table;
|
||||
// we surface duration only and fall back to byteRate seek estimation.
|
||||
return { isXing: true, totalFrames, totalBytes: 0, toc: null };
|
||||
}
|
||||
|
||||
private static matchAscii(buffer: Uint8Array, pos: number, tag: string): boolean {
|
||||
if (pos + tag.length > buffer.length) return false;
|
||||
for (let i = 0; i < tag.length; i++) {
|
||||
if (buffer[pos + i] !== tag.charCodeAt(i)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static readUint32BE(buffer: Uint8Array, pos: number): number {
|
||||
// Unsigned: coerce the sign bit back to a positive value.
|
||||
return ((buffer[pos] << 24) | (buffer[pos + 1] << 16) | (buffer[pos + 2] << 8) | buffer[pos + 3]) >>> 0;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user