/** * Mp3FormatDecoder - MP3 (MPEG-1/2/2.5 Layer III) implementation of IFormatDecoder. * * All MP3-specific stream logic lives here: ID3v2 skipping, MPEG frame-sync scanning, * frame-header decode, Xing/Info/VBRI VBR-header detection, segment sizing, and seek * byte-offset math (CBR estimate or VBR TOC interpolation). StreamDecoder delegates to * this via IFormatDecoder and holds no MP3 knowledge of its own. * * MP3 frames are self-contained, so wrapSegment is a zero-copy passthrough — the browser's * decodeAudioData accepts raw frame bytes directly and tolerates a partial leading frame. */ import { FormatInfo, IFormatDecoder, Mp3VbrSeekData } from './IFormatDecoder.js'; export class Mp3FormatDecoder implements IFormatDecoder { // MPEG Layer III bitrate tables (kbps), indexed by the 4-bit bitrate index. // Index 0 (free) and 15 (reserved) are invalid and rejected during frame validation. private static readonly BITRATES_MPEG1 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320]; private static readonly BITRATES_MPEG2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160]; // Sample-rate tables (Hz), indexed by the 2-bit sample-rate index (3 = reserved). private static readonly SAMPLE_RATES_MPEG1 = [44100, 48000, 32000]; private static readonly SAMPLE_RATES_MPEG2 = [22050, 24000, 16000]; private static readonly SAMPLE_RATES_MPEG25 = [11025, 12000, 8000]; tryParseHeader(chunks: Uint8Array[], totalSize: number): FormatInfo | null { const buffer = Mp3FormatDecoder.concat(chunks, totalSize); // Need at least the 10-byte ID3v2 header probe plus a 4-byte frame header. if (buffer.length < 10) return null; const searchStart = Mp3FormatDecoder.id3v2Skip(buffer); // Scan for the first valid MPEG Layer III frame from the skip offset. const frameStart = Mp3FormatDecoder.findFrameSync(buffer, searchStart); if (frameStart < 0) return null; // Decode the 4-byte frame header. const h = Mp3FormatDecoder.decodeFrameHeader(buffer, frameStart); if (!h) return null; const vbr = Mp3FormatDecoder.parseVbrHeader(buffer, frameStart, h); // Xing "Xing" tag → true VBR (size-based segments, no fixed blockAlign). // Xing "Info" tag or no VBR header → CBR (frame-aligned blockAlign). const isVbr = vbr?.isXing === true; const blockAlign = isVbr ? 0 : h.frameSize; let totalDuration: number | null = null; let seekData: Mp3VbrSeekData | null = null; if (vbr && vbr.totalFrames > 0) { totalDuration = vbr.totalFrames * h.samplesPerFrame / h.sampleRate; } if (vbr?.toc) { seekData = { kind: 'mp3-vbr', toc: vbr.toc, totalBytes: vbr.totalBytes }; } return { sampleRate: h.sampleRate, channels: h.channels, bitsPerSample: 16, // conventional for MP3 (decoder handles the real format internally) byteRate: h.bitrateKbps * 125, // bytes/sec; used for CBR seek estimate blockAlign, totalDuration, audioDataOffset: frameStart, // file-absolute byte where audio frames begin seekData }; } getAlignedSegmentSize( info: FormatInfo, availableBytes: number, requestedSize: number, streamComplete: boolean ): number { const minSize = 4096; // at least 4 KB before starting decode if (availableBytes === 0) return 0; if (info.blockAlign > 0) { // CBR: align to complete MP3 frames so each segment is independently decodable. // Guard: need at least one full frame; discard sub-frame tail rather than over-reading. if (availableBytes < info.blockAlign) return 0; const minFrames = Math.ceil(minSize / info.blockAlign); const availableFrames = Math.floor(availableBytes / info.blockAlign); if (!streamComplete && availableFrames < minFrames) return 0; const requestedFrames = Math.floor(Math.min(requestedSize, availableBytes) / info.blockAlign); // Never exceed availableBytes (clamp via requestedFrames which is floor'd from availableBytes). return Math.max(streamComplete ? 1 : minFrames, requestedFrames) * info.blockAlign; } // VBR: size-based — frame sizes vary, so we cannot align cleanly. The browser MP3 // decoder skips a partial leading frame gracefully. if (!streamComplete && availableBytes < minSize) return 0; return Math.min(requestedSize, availableBytes); } wrapSegment(_info: FormatInfo, rawBytes: Uint8Array): Uint8Array { // MP3 frames are self-contained; decodeAudioData accepts raw frame data directly. return rawBytes; } calculateByteOffset(info: FormatInfo, positionSeconds: number): number { if (info.totalDuration == null || info.totalDuration <= 0) { // No duration info — CBR byteRate estimate. return Mp3FormatDecoder.byteRateOffset(info, positionSeconds); } const mp3Vbr = info.seekData?.kind === 'mp3-vbr' ? info.seekData as Mp3VbrSeekData : null; if (mp3Vbr?.toc && mp3Vbr.totalBytes > 0) { // VBR with Xing TOC — interpolate file-byte fraction from the percentage table. const percent = Math.min(99, positionSeconds / info.totalDuration * 100); const tocIdx = Math.floor(percent); const tocFrac = percent - tocIdx; const t0 = mp3Vbr.toc[tocIdx]; const t1 = tocIdx < 99 ? mp3Vbr.toc[tocIdx + 1] : 256; const bytePercent = (t0 + (t1 - t0) * tocFrac) / 256.0; return info.audioDataOffset + Math.floor(bytePercent * mp3Vbr.totalBytes); } // VBR without TOC or CBR with duration — byteRate estimate. return Mp3FormatDecoder.byteRateOffset(info, positionSeconds); } /** * CBR/VBR-without-TOC seek estimate from average byte rate. Frame-aligns the result * when blockAlign is known (CBR); otherwise returns the raw byte position. */ private static byteRateOffset(info: FormatInfo, positionSeconds: number): number { if (info.byteRate <= 0) return info.audioDataOffset; const raw = Math.floor(positionSeconds * info.byteRate); if (info.blockAlign > 0) { return info.audioDataOffset + Math.floor(raw / info.blockAlign) * info.blockAlign; } return info.audioDataOffset + raw; } /** Concatenate the accumulated chunks into one contiguous buffer. */ private static concat(chunks: Uint8Array[], totalSize: number): Uint8Array { if (chunks.length === 1) return chunks[0]; const buffer = new Uint8Array(totalSize); let offset = 0; for (const c of chunks) { buffer.set(c, offset); offset += c.length; } return buffer; } /** * Return the byte offset past an ID3v2 tag if one is present at the buffer start, * else 0. The size field is a syncsafe big-endian uint28 (each byte's bit 7 is 0). */ private static id3v2Skip(buffer: Uint8Array): number { if (buffer.length < 10) return 0; if (buffer[0] !== 0x49 || buffer[1] !== 0x44 || buffer[2] !== 0x33) return 0; // 'I' 'D' '3' const size = (buffer[6] << 21) | (buffer[7] << 14) | (buffer[8] << 7) | buffer[9]; const hasFooter = (buffer[5] & 0x10) !== 0; // bit 4 of flags byte return 10 + size + (hasFooter ? 10 : 0); } /** * Scan one byte at a time from `start` for the first byte position that begins a valid * MPEG Layer III frame. Returns the offset, or -1 if none found in the available bytes * (caller should wait for more data). */ private static findFrameSync(buffer: Uint8Array, start: number): number { // Need 4 bytes for a full frame header. for (let i = Math.max(0, start); i + 4 <= buffer.length; i++) { if (buffer[i] !== 0xff) continue; if ((buffer[i + 1] & 0xe0) !== 0xe0) continue; // top 3 bits of byte 1 must be set const version = (buffer[i + 1] >> 3) & 3; const layer = (buffer[i + 1] >> 1) & 3; const bitrateIndex = buffer[i + 2] >> 4; const sampleRateIndex = (buffer[i + 2] >> 2) & 3; if (version === 1) continue; // 01 = reserved if (layer !== 1) continue; // 01 = Layer III if (bitrateIndex === 0 || bitrateIndex === 15) continue; // free / reserved if (sampleRateIndex === 3) continue; // reserved return i; } return -1; } /** * Decode the 4-byte frame header at `frameStart`. Returns null if the resolved * bitrate/sample-rate are invalid (defensive — findFrameSync already validated indices). */ private static decodeFrameHeader(buffer: Uint8Array, frameStart: number): { version: number; sampleRate: number; channels: number; channelMode: number; bitrateKbps: number; samplesPerFrame: number; frameSize: number; } | null { const b1 = buffer[frameStart + 1]; const b2 = buffer[frameStart + 2]; const b3 = buffer[frameStart + 3]; const version = (b1 >> 3) & 3; // 3 = MPEG1, 2 = MPEG2, 0 = MPEG2.5 const bitrateIndex = b2 >> 4; const sampleRateIndex = (b2 >> 2) & 3; const paddingBit = (b2 >> 1) & 1; const channelMode = b3 >> 6; // 0-2 = stereo variants, 3 = mono const isMpeg1 = version === 3; const bitrateKbps = isMpeg1 ? Mp3FormatDecoder.BITRATES_MPEG1[bitrateIndex] : Mp3FormatDecoder.BITRATES_MPEG2[bitrateIndex]; const sampleRate = version === 3 ? Mp3FormatDecoder.SAMPLE_RATES_MPEG1[sampleRateIndex] : version === 2 ? Mp3FormatDecoder.SAMPLE_RATES_MPEG2[sampleRateIndex] : Mp3FormatDecoder.SAMPLE_RATES_MPEG25[sampleRateIndex]; if (!bitrateKbps || !sampleRate) return null; const channels = channelMode === 3 ? 1 : 2; const samplesPerFrame = isMpeg1 ? 1152 : 576; const frameSize = Math.floor(144 * bitrateKbps * 1000 / sampleRate) + paddingBit; return { version, sampleRate, channels, channelMode, bitrateKbps, samplesPerFrame, frameSize }; } /** * Detect a Xing/Info (VBR or CBR-with-info) or VBRI header inside the first frame. * Returns null when neither is present (pure CBR). */ private static parseVbrHeader( buffer: Uint8Array, frameStart: number, h: { version: number; channelMode: number } ): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null { const isMpeg1 = h.version === 3; const isMono = h.channelMode === 3; // Side-info region size depends on version and channel count. const sideInfoOffset = isMpeg1 ? (isMono ? 17 : 32) : (isMono ? 9 : 17); const xing = Mp3FormatDecoder.parseXing(buffer, frameStart, sideInfoOffset); if (xing) return xing; return Mp3FormatDecoder.parseVbri(buffer, frameStart); } /** Parse a Xing/Info tag in the side-info region. Returns null if absent. */ private static parseXing( buffer: Uint8Array, frameStart: number, sideInfoOffset: number ): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null { const tagPos = frameStart + 4 + sideInfoOffset; if (tagPos + 8 > buffer.length) return null; const isXing = Mp3FormatDecoder.matchAscii(buffer, tagPos, 'Xing'); const isInfo = Mp3FormatDecoder.matchAscii(buffer, tagPos, 'Info'); if (!isXing && !isInfo) return null; const flags = Mp3FormatDecoder.readUint32BE(buffer, tagPos + 4); // Fields are packed in flag order: frames, bytes, TOC, quality. let pos = tagPos + 8; let totalFrames = 0; let totalBytes = 0; let toc: Uint8Array | null = null; if (flags & 0x1) { // frame count present if (pos + 4 > buffer.length) return { isXing, totalFrames, totalBytes, toc }; totalFrames = Mp3FormatDecoder.readUint32BE(buffer, pos); pos += 4; } if (flags & 0x2) { // byte count present if (pos + 4 > buffer.length) return { isXing, totalFrames, totalBytes, toc }; totalBytes = Mp3FormatDecoder.readUint32BE(buffer, pos); pos += 4; } if (flags & 0x4) { // TOC present — only meaningful alongside a frame count if (pos + 100 <= buffer.length && (flags & 0x1)) { toc = buffer.slice(pos, pos + 100); } pos += 100; } return { isXing, totalFrames, totalBytes, toc }; } /** Parse a VBRI tag at the fixed Fraunhofer position. Returns null if absent. */ private static parseVbri( buffer: Uint8Array, frameStart: number ): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null { const pos = frameStart + 4 + 32; if (pos + 18 > buffer.length) return null; if (!Mp3FormatDecoder.matchAscii(buffer, pos, 'VBRI')) return null; const totalFrames = Mp3FormatDecoder.readUint32BE(buffer, pos + 14); // VBRI is always VBR but its TOC layout differs from Xing's percentage table; // we surface duration only and fall back to byteRate seek estimation. return { isXing: true, totalFrames, totalBytes: 0, toc: null }; } private static matchAscii(buffer: Uint8Array, pos: number, tag: string): boolean { if (pos + tag.length > buffer.length) return false; for (let i = 0; i < tag.length; i++) { if (buffer[pos + i] !== tag.charCodeAt(i)) return false; } return true; } private static readUint32BE(buffer: Uint8Array, pos: number): number { // Unsigned: coerce the sign bit back to a positive value. return ((buffer[pos] << 24) | (buffer[pos + 1] << 16) | (buffer[pos + 2] << 8) | buffer[pos + 3]) >>> 0; } }