From b04081b96099d0cfa779df8f6e4b1ccb265b2d9f Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Thu, 11 Jun 2026 08:40:52 -0400 Subject: [PATCH 1/2] feat(audio): add Mp3FormatDecoder streaming strategy Implements IFormatDecoder for MP3: ID3v2 skip, MPEG Layer III frame-sync + header decode, Xing/Info/VBRI detection, CBR frame alignment, and VBR TOC seek interpolation. Wiring lands in Wave 3. --- .../Interop/audio/Mp3FormatDecoder.ts | 322 ++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts diff --git a/DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts b/DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts new file mode 100644 index 0000000..eda1397 --- /dev/null +++ b/DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts @@ -0,0 +1,322 @@ +/** + * Mp3FormatDecoder - MP3 (MPEG-1/2/2.5 Layer III) implementation of IFormatDecoder. + * + * All MP3-specific stream logic lives here: ID3v2 skipping, MPEG frame-sync scanning, + * frame-header decode, Xing/Info/VBRI VBR-header detection, segment sizing, and seek + * byte-offset math (CBR estimate or VBR TOC interpolation). StreamDecoder delegates to + * this via IFormatDecoder and holds no MP3 knowledge of its own. + * + * MP3 frames are self-contained, so wrapSegment is a zero-copy passthrough — the browser's + * decodeAudioData accepts raw frame bytes directly and tolerates a partial leading frame. + */ + +import { FormatInfo, IFormatDecoder, Mp3VbrSeekData } from './IFormatDecoder.js'; + +export class Mp3FormatDecoder implements IFormatDecoder { + // MPEG Layer III bitrate tables (kbps), indexed by the 4-bit bitrate index. + // Index 0 (free) and 15 (reserved) are invalid and rejected during frame validation. + private static readonly BITRATES_MPEG1 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320]; + private static readonly BITRATES_MPEG2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160]; + + // Sample-rate tables (Hz), indexed by the 2-bit sample-rate index (3 = reserved). + private static readonly SAMPLE_RATES_MPEG1 = [44100, 48000, 32000]; + private static readonly SAMPLE_RATES_MPEG2 = [22050, 24000, 16000]; + private static readonly SAMPLE_RATES_MPEG25 = [11025, 12000, 8000]; + + tryParseHeader(chunks: Uint8Array[], totalSize: number): FormatInfo | null { + const buffer = Mp3FormatDecoder.concat(chunks, totalSize); + + // Need at least the 10-byte ID3v2 header probe plus a 4-byte frame header. + if (buffer.length < 10) return null; + + const searchStart = Mp3FormatDecoder.id3v2Skip(buffer); + + // Scan for the first valid MPEG Layer III frame from the skip offset. + const frameStart = Mp3FormatDecoder.findFrameSync(buffer, searchStart); + if (frameStart < 0) return null; + + // Decode the 4-byte frame header. + const h = Mp3FormatDecoder.decodeFrameHeader(buffer, frameStart); + if (!h) return null; + + const vbr = Mp3FormatDecoder.parseVbrHeader(buffer, frameStart, h); + + // Xing "Xing" tag → true VBR (size-based segments, no fixed blockAlign). + // Xing "Info" tag or no VBR header → CBR (frame-aligned blockAlign). + const isVbr = vbr?.isXing === true; + const blockAlign = isVbr ? 0 : h.frameSize; + + let totalDuration: number | null = null; + let seekData: Mp3VbrSeekData | null = null; + if (vbr && vbr.totalFrames > 0) { + totalDuration = vbr.totalFrames * h.samplesPerFrame / h.sampleRate; + } + if (vbr?.toc) { + seekData = { kind: 'mp3-vbr', toc: vbr.toc, totalBytes: vbr.totalBytes }; + } + + return { + sampleRate: h.sampleRate, + channels: h.channels, + bitsPerSample: 16, // conventional for MP3 (decoder handles the real format internally) + byteRate: h.bitrateKbps * 125, // bytes/sec; used for CBR seek estimate + blockAlign, + totalDuration, + audioDataOffset: frameStart, // file-absolute byte where audio frames begin + seekData + }; + } + + getAlignedSegmentSize( + info: FormatInfo, + availableBytes: number, + requestedSize: number, + streamComplete: boolean + ): number { + const minSize = 4096; // at least 4 KB before starting decode + + if (availableBytes === 0) return 0; + + if (info.blockAlign > 0) { + // CBR: align to complete MP3 frames so each segment is independently decodable. + const minFrames = Math.ceil(minSize / info.blockAlign); + const availableFrames = Math.floor(availableBytes / info.blockAlign); + if (!streamComplete && availableFrames < minFrames) return 0; + const requestedFrames = Math.floor(Math.min(requestedSize, availableBytes) / info.blockAlign); + return Math.max(streamComplete ? 1 : minFrames, requestedFrames) * info.blockAlign; + } + + // VBR: size-based — frame sizes vary, so we cannot align cleanly. The browser MP3 + // decoder skips a partial leading frame gracefully. + if (!streamComplete && availableBytes < minSize) return 0; + return Math.min(requestedSize, availableBytes); + } + + wrapSegment(_info: FormatInfo, rawBytes: Uint8Array): Uint8Array { + // MP3 frames are self-contained; decodeAudioData accepts raw frame data directly. + return rawBytes; + } + + calculateByteOffset(info: FormatInfo, positionSeconds: number): number { + if (info.totalDuration == null || info.totalDuration <= 0) { + // No duration info — CBR byteRate estimate. + return Mp3FormatDecoder.byteRateOffset(info, positionSeconds); + } + + const mp3Vbr = info.seekData?.kind === 'mp3-vbr' ? info.seekData as Mp3VbrSeekData : null; + + if (mp3Vbr?.toc && mp3Vbr.totalBytes > 0) { + // VBR with Xing TOC — interpolate file-byte fraction from the percentage table. + const percent = Math.min(99, positionSeconds / info.totalDuration * 100); + const tocIdx = Math.floor(percent); + const tocFrac = percent - tocIdx; + const t0 = mp3Vbr.toc[tocIdx]; + const t1 = tocIdx < 99 ? mp3Vbr.toc[tocIdx + 1] : 256; + const bytePercent = (t0 + (t1 - t0) * tocFrac) / 256.0; + return info.audioDataOffset + Math.floor(bytePercent * mp3Vbr.totalBytes); + } + + // VBR without TOC or CBR with duration — byteRate estimate. + return Mp3FormatDecoder.byteRateOffset(info, positionSeconds); + } + + /** + * CBR/VBR-without-TOC seek estimate from average byte rate. Frame-aligns the result + * when blockAlign is known (CBR); otherwise returns the raw byte position. + */ + private static byteRateOffset(info: FormatInfo, positionSeconds: number): number { + if (info.byteRate <= 0) return info.audioDataOffset; + const raw = Math.floor(positionSeconds * info.byteRate); + if (info.blockAlign > 0) { + return info.audioDataOffset + Math.floor(raw / info.blockAlign) * info.blockAlign; + } + return info.audioDataOffset + raw; + } + + /** Concatenate the accumulated chunks into one contiguous buffer. */ + private static concat(chunks: Uint8Array[], totalSize: number): Uint8Array { + if (chunks.length === 1) return chunks[0]; + const buffer = new Uint8Array(totalSize); + let offset = 0; + for (const c of chunks) { + buffer.set(c, offset); + offset += c.length; + } + return buffer; + } + + /** + * Return the byte offset past an ID3v2 tag if one is present at the buffer start, + * else 0. The size field is a syncsafe big-endian uint28 (each byte's bit 7 is 0). + */ + private static id3v2Skip(buffer: Uint8Array): number { + if (buffer.length < 10) return 0; + if (buffer[0] !== 0x49 || buffer[1] !== 0x44 || buffer[2] !== 0x33) return 0; // 'I' 'D' '3' + + const size = (buffer[6] << 21) | (buffer[7] << 14) | (buffer[8] << 7) | buffer[9]; + const hasFooter = (buffer[5] & 0x10) !== 0; // bit 4 of flags byte + return 10 + size + (hasFooter ? 10 : 0); + } + + /** + * Scan one byte at a time from `start` for the first byte position that begins a valid + * MPEG Layer III frame. Returns the offset, or -1 if none found in the available bytes + * (caller should wait for more data). + */ + private static findFrameSync(buffer: Uint8Array, start: number): number { + // Need 4 bytes for a full frame header. + for (let i = Math.max(0, start); i + 4 <= buffer.length; i++) { + if (buffer[i] !== 0xff) continue; + if ((buffer[i + 1] & 0xe0) !== 0xe0) continue; // top 3 bits of byte 1 must be set + + const version = (buffer[i + 1] >> 3) & 3; + const layer = (buffer[i + 1] >> 1) & 3; + const bitrateIndex = buffer[i + 2] >> 4; + const sampleRateIndex = (buffer[i + 2] >> 2) & 3; + + if (version === 1) continue; // 01 = reserved + if (layer !== 1) continue; // 01 = Layer III + if (bitrateIndex === 0 || bitrateIndex === 15) continue; // free / reserved + if (sampleRateIndex === 3) continue; // reserved + + return i; + } + return -1; + } + + /** + * Decode the 4-byte frame header at `frameStart`. Returns null if the resolved + * bitrate/sample-rate are invalid (defensive — findFrameSync already validated indices). + */ + private static decodeFrameHeader(buffer: Uint8Array, frameStart: number): { + version: number; + sampleRate: number; + channels: number; + channelMode: number; + bitrateKbps: number; + samplesPerFrame: number; + frameSize: number; + } | null { + const b1 = buffer[frameStart + 1]; + const b2 = buffer[frameStart + 2]; + const b3 = buffer[frameStart + 3]; + + const version = (b1 >> 3) & 3; // 3 = MPEG1, 2 = MPEG2, 0 = MPEG2.5 + const bitrateIndex = b2 >> 4; + const sampleRateIndex = (b2 >> 2) & 3; + const paddingBit = (b2 >> 1) & 1; + const channelMode = b3 >> 6; // 0-2 = stereo variants, 3 = mono + + const isMpeg1 = version === 3; + + const bitrateKbps = isMpeg1 + ? Mp3FormatDecoder.BITRATES_MPEG1[bitrateIndex] + : Mp3FormatDecoder.BITRATES_MPEG2[bitrateIndex]; + + const sampleRate = version === 3 + ? Mp3FormatDecoder.SAMPLE_RATES_MPEG1[sampleRateIndex] + : version === 2 + ? Mp3FormatDecoder.SAMPLE_RATES_MPEG2[sampleRateIndex] + : Mp3FormatDecoder.SAMPLE_RATES_MPEG25[sampleRateIndex]; + + if (!bitrateKbps || !sampleRate) return null; + + const channels = channelMode === 3 ? 1 : 2; + const samplesPerFrame = isMpeg1 ? 1152 : 576; + const frameSize = Math.floor(144 * bitrateKbps * 1000 / sampleRate) + paddingBit; + + return { version, sampleRate, channels, channelMode, bitrateKbps, samplesPerFrame, frameSize }; + } + + /** + * Detect a Xing/Info (VBR or CBR-with-info) or VBRI header inside the first frame. + * Returns null when neither is present (pure CBR). + */ + private static parseVbrHeader( + buffer: Uint8Array, + frameStart: number, + h: { version: number; channelMode: number } + ): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null { + const isMpeg1 = h.version === 3; + const isMono = h.channelMode === 3; + + // Side-info region size depends on version and channel count. + const sideInfoOffset = isMpeg1 + ? (isMono ? 17 : 32) + : (isMono ? 9 : 17); + + const xing = Mp3FormatDecoder.parseXing(buffer, frameStart, sideInfoOffset); + if (xing) return xing; + + return Mp3FormatDecoder.parseVbri(buffer, frameStart); + } + + /** Parse a Xing/Info tag in the side-info region. Returns null if absent. */ + private static parseXing( + buffer: Uint8Array, + frameStart: number, + sideInfoOffset: number + ): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null { + const tagPos = frameStart + 4 + sideInfoOffset; + if (tagPos + 8 > buffer.length) return null; + + const isXing = Mp3FormatDecoder.matchAscii(buffer, tagPos, 'Xing'); + const isInfo = Mp3FormatDecoder.matchAscii(buffer, tagPos, 'Info'); + if (!isXing && !isInfo) return null; + + const flags = Mp3FormatDecoder.readUint32BE(buffer, tagPos + 4); + + // Fields are packed in flag order: frames, bytes, TOC, quality. + let pos = tagPos + 8; + let totalFrames = 0; + let totalBytes = 0; + let toc: Uint8Array | null = null; + + if (flags & 0x1) { // frame count present + if (pos + 4 > buffer.length) return { isXing, totalFrames, totalBytes, toc }; + totalFrames = Mp3FormatDecoder.readUint32BE(buffer, pos); + pos += 4; + } + if (flags & 0x2) { // byte count present + if (pos + 4 > buffer.length) return { isXing, totalFrames, totalBytes, toc }; + totalBytes = Mp3FormatDecoder.readUint32BE(buffer, pos); + pos += 4; + } + if (flags & 0x4) { // TOC present — only meaningful alongside a frame count + if (pos + 100 <= buffer.length && (flags & 0x1)) { + toc = buffer.slice(pos, pos + 100); + } + pos += 100; + } + + return { isXing, totalFrames, totalBytes, toc }; + } + + /** Parse a VBRI tag at the fixed Fraunhofer position. Returns null if absent. */ + private static parseVbri( + buffer: Uint8Array, + frameStart: number + ): { isXing: boolean; totalFrames: number; totalBytes: number; toc: Uint8Array | null } | null { + const pos = frameStart + 4 + 32; + if (pos + 18 > buffer.length) return null; + if (!Mp3FormatDecoder.matchAscii(buffer, pos, 'VBRI')) return null; + + const totalFrames = Mp3FormatDecoder.readUint32BE(buffer, pos + 14); + // VBRI is always VBR but its TOC layout differs from Xing's percentage table; + // we surface duration only and fall back to byteRate seek estimation. + return { isXing: true, totalFrames, totalBytes: 0, toc: null }; + } + + private static matchAscii(buffer: Uint8Array, pos: number, tag: string): boolean { + if (pos + tag.length > buffer.length) return false; + for (let i = 0; i < tag.length; i++) { + if (buffer[pos + i] !== tag.charCodeAt(i)) return false; + } + return true; + } + + private static readUint32BE(buffer: Uint8Array, pos: number): number { + // Unsigned: coerce the sign bit back to a positive value. + return ((buffer[pos] << 24) | (buffer[pos + 1] << 16) | (buffer[pos + 2] << 8) | buffer[pos + 3]) >>> 0; + } +} From a2771c71aacaabbd005025682c836c5f79023410 Mon Sep 17 00:00:00 2001 From: daniel-c-harvey Date: Thu, 11 Jun 2026 09:04:53 -0400 Subject: [PATCH 2/2] fix(mp3): guard sub-frame tail in getAlignedSegmentSize to prevent over-read past availableBytes --- DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts b/DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts index eda1397..cfb484d 100644 --- a/DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts +++ b/DeepDrftPublic/Interop/audio/Mp3FormatDecoder.ts @@ -79,10 +79,14 @@ export class Mp3FormatDecoder implements IFormatDecoder { if (info.blockAlign > 0) { // CBR: align to complete MP3 frames so each segment is independently decodable. + // Guard: need at least one full frame; discard sub-frame tail rather than over-reading. + if (availableBytes < info.blockAlign) return 0; + const minFrames = Math.ceil(minSize / info.blockAlign); const availableFrames = Math.floor(availableBytes / info.blockAlign); if (!streamComplete && availableFrames < minFrames) return 0; const requestedFrames = Math.floor(Math.min(requestedSize, availableBytes) / info.blockAlign); + // Never exceed availableBytes (clamp via requestedFrames which is floor'd from availableBytes). return Math.max(streamComplete ? 1 : minFrames, requestedFrames) * info.blockAlign; }