diff --git a/DeepDrftPublic/Interop/audio/FlacFormatDecoder.ts b/DeepDrftPublic/Interop/audio/FlacFormatDecoder.ts new file mode 100644 index 0000000..3b9904a --- /dev/null +++ b/DeepDrftPublic/Interop/audio/FlacFormatDecoder.ts @@ -0,0 +1,224 @@ +/** + * FlacFormatDecoder - FLAC implementation of IFormatDecoder. + * + * FLAC differs from WAV (raw PCM) and MP3 (self-contained frames): no audio frame + * is decodable without the STREAMINFO metadata block that opens the stream. So this + * decoder captures STREAMINFO during header parsing and wrapSegment prepends a minimal + * valid FLAC header ("fLaC" + STREAMINFO) to every raw audio segment, making each segment + * independently decodable by the browser's decodeAudioData. + * + * Seeking uses the optional SEEKTABLE metadata block when present; absent a table, seek + * degrades to the start of audio (restart), which is correct behavior with no seek points. + */ + +import { FlacSeekData, FormatInfo, IFormatDecoder } from './IFormatDecoder.js'; + +const FLAC_MAGIC = [0x66, 0x4c, 0x61, 0x43]; // "fLaC" +const STREAMINFO_DATA_LEN = 34; +const SEEK_POINT_SIZE = 18; +const PLACEHOLDER_HI = 0xffffffff; // sample_number placeholder = 0xFFFFFFFFFFFFFFFF +const TWO_POW_32 = 4294967296; + +export class FlacFormatDecoder implements IFormatDecoder { + tryParseHeader(chunks: Uint8Array[], totalSize: number): FormatInfo | null { + const buf = concat(chunks, totalSize); + + // Need at least the magic to decide anything. + if (buf.length < 4) return null; + if (buf[0] !== FLAC_MAGIC[0] || buf[1] !== FLAC_MAGIC[1] || + buf[2] !== FLAC_MAGIC[2] || buf[3] !== FLAC_MAGIC[3]) { + return null; // silently; StreamDecoder will error when MAX_HEADER_SEARCH_BYTES is exhausted + } + + let sampleRate = 0; + let channels = 0; + let bitsPerSample = 0; + let totalSamples = 0; + let streamInfoBytes: Uint8Array | null = null; + let seekPoints: FlacSeekData['points'] = []; + + // Scan metadata blocks starting after the 4-byte magic. + let offset = 4; + while (true) { + // Each block opens with a 4-byte header. + if (offset + 4 > buf.length) return null; + + const isLast = (buf[offset] & 0x80) !== 0; + const blockType = buf[offset] & 0x7f; + const dataLen = (buf[offset + 1] << 16) | (buf[offset + 2] << 8) | buf[offset + 3]; + const dataStart = offset + 4; + + // Need the full block data before we can advance. + if (dataStart + dataLen > buf.length) return null; + + if (blockType === 0) { + // STREAMINFO (mandatory first block). data offsets are relative to dataStart. + const d = dataStart; + sampleRate = (buf[d + 10] << 12) | (buf[d + 11] << 4) | (buf[d + 12] >> 4); + channels = ((buf[d + 12] >> 1) & 0x07) + 1; + bitsPerSample = (((buf[d + 12] & 0x01) << 4) | (buf[d + 13] >> 4)) + 1; + totalSamples = ((buf[d + 13] & 0x0f) * TWO_POW_32) + readUint32BE(buf, d + 14); + + // Build the 38-byte synthetic block: header (is_last=1, type=0, len=34) + 34 data bytes. + streamInfoBytes = new Uint8Array(4 + STREAMINFO_DATA_LEN); + streamInfoBytes[0] = 0x80; // is_last=1, block_type=0 + streamInfoBytes[1] = 0x00; + streamInfoBytes[2] = 0x00; + streamInfoBytes[3] = STREAMINFO_DATA_LEN; // 0x22 + streamInfoBytes.set(buf.subarray(d, d + STREAMINFO_DATA_LEN), 4); + } else if (blockType === 3) { + // SEEKTABLE (optional). Each point is 18 bytes. + const count = Math.floor(dataLen / SEEK_POINT_SIZE); + const points: FlacSeekData['points'] = []; + for (let i = 0; i < count; i++) { + const p = dataStart + i * SEEK_POINT_SIZE; + const sampleHi = readUint32BE(buf, p); + const sampleLo = readUint32BE(buf, p + 4); + // Placeholder points (sample_number = all 1s) carry no offset — skip. + if (sampleHi === PLACEHOLDER_HI && sampleLo === PLACEHOLDER_HI) continue; + + // sample_number: imprecise beyond 2^53 (~8h at 44100Hz); acceptable for seek nearest. + const sampleNumber = sampleHi * TWO_POW_32 + sampleLo; + // stream_offset: bytes from start of audio frames; safe to 2^53 for sub-petabyte files. + const offsetHi = readUint32BE(buf, p + 8); + const offsetLo = readUint32BE(buf, p + 12); + const streamOffset = offsetHi * TWO_POW_32 + offsetLo; + points.push({ sampleNumber, streamOffset }); + } + seekPoints = points; + } + + offset = dataStart + dataLen; + if (isLast) break; + } + + if (!streamInfoBytes) { + console.warn('FlacFormatDecoder: no STREAMINFO block found'); + return null; + } + + const audioDataOffset = offset; // 4 (magic) + sum of all block header+data sizes + const totalDuration = sampleRate > 0 && totalSamples > 0 + ? totalSamples / sampleRate : null; + + return { + sampleRate, + channels, + bitsPerSample, + byteRate: 0, // FLAC is VBR; seeking uses SEEKTABLE or degrades gracefully. + blockAlign: 0, // Variable-size FLAC frames; no fixed alignment. + totalDuration, + audioDataOffset, + seekData: { + kind: 'flac-seektable', + points: seekPoints, + streamInfoBytes, + metadataBlocksSize: audioDataOffset - 4 // metadata bytes, excluding fLaC magic + } + }; + } + + getAlignedSegmentSize( + info: FormatInfo, + availableBytes: number, + requestedSize: number, + streamComplete: boolean, + rawData?: Uint8Array + ): number { + if (availableBytes === 0) return 0; + const candidate = Math.min(requestedSize, availableBytes); + + if (!rawData || rawData.length === 0) { + // No scan data — conservative threshold to avoid tiny unusable segments + if (!streamComplete && availableBytes < 16384) return 0; + return candidate; + } + + // Scan backward from the candidate boundary to find the last FLAC frame sync code. + const boundary = FlacFormatDecoder.findLastFlacFrame(rawData, candidate); + if (boundary <= 0) { + if (streamComplete) return candidate; // flush remaining bytes (stream done) + return 0; // wait for more data + } + return boundary; + } + + /** + * Scan backward from `maxBytes` in `rawData` to find the start of the last valid FLAC + * audio frame. FLAC frame sync: 0xFF followed by a byte where top 7 bits are 0xF8 + * (i.e. (byte & 0xFE) === 0xF8 — covers both blocking-strategy variants 0xF8 and 0xF9). + * Returns the byte offset of that sync, or 0 if none is found (causes caller to wait). + */ + private static findLastFlacFrame(rawData: Uint8Array, maxBytes: number): number { + const limit = Math.min(maxBytes, rawData.length); + // Need at least 2 bytes to verify sync pair; skip the very last byte. + for (let i = limit - 2; i > 0; i--) { + if (rawData[i] === 0xFF && (rawData[i + 1] & 0xFE) === 0xF8) { + return i; + } + } + return 0; + } + + wrapSegment(info: FormatInfo, rawBytes: Uint8Array): Uint8Array { + const flacData = info.seekData as FlacSeekData | null | undefined; + const streamInfoBytes = flacData?.streamInfoBytes; + if (!streamInfoBytes) { + // Defensive: without STREAMINFO the segment isn't decodable. This path shouldn't + // occur in practice — tryParseHeader always populates streamInfoBytes on success. + return rawBytes; + } + + // Build: fLaC (4) + STREAMINFO block (38) + audio frames. + const result = new Uint8Array(4 + streamInfoBytes.length + rawBytes.length); + result[0] = FLAC_MAGIC[0]; + result[1] = FLAC_MAGIC[1]; + result[2] = FLAC_MAGIC[2]; + result[3] = FLAC_MAGIC[3]; + result.set(streamInfoBytes, 4); + result.set(rawBytes, 4 + streamInfoBytes.length); + return result; + } + + calculateByteOffset(info: FormatInfo, positionSeconds: number): number { + const flacData = info.seekData?.kind === 'flac-seektable' + ? info.seekData as FlacSeekData : null; + + if (flacData?.points && flacData.points.length > 0 && info.sampleRate > 0) { + // SEEKTABLE binary search for the nearest point at or before the target sample. + const targetSample = positionSeconds * info.sampleRate; + const points = flacData.points; + + let lo = 0, hi = points.length - 1, best = 0; + while (lo <= hi) { + const mid = (lo + hi) >> 1; + if (points[mid].sampleNumber <= targetSample) { + best = mid; + lo = mid + 1; + } else { + hi = mid - 1; + } + } + // streamOffset is bytes from start of audio data; add audioDataOffset for file-absolute. + return info.audioDataOffset + points[best].streamOffset; + } + + // No SEEKTABLE: degrade to start of audio (seek restarts from beginning). + return info.audioDataOffset; + } +} + +function concat(chunks: Uint8Array[], totalSize: number): Uint8Array { + if (chunks.length === 1) return chunks[0]; + const out = new Uint8Array(totalSize); + let pos = 0; + for (const c of chunks) { + out.set(c, pos); + pos += c.length; + } + return out; +} + +function readUint32BE(buf: Uint8Array, p: number): number { + return ((buf[p] << 24) | (buf[p + 1] << 16) | (buf[p + 2] << 8) | buf[p + 3]) >>> 0; +} diff --git a/DeepDrftPublic/Interop/audio/IFormatDecoder.ts b/DeepDrftPublic/Interop/audio/IFormatDecoder.ts index 23c1135..bd9fe1e 100644 --- a/DeepDrftPublic/Interop/audio/IFormatDecoder.ts +++ b/DeepDrftPublic/Interop/audio/IFormatDecoder.ts @@ -73,12 +73,18 @@ export interface IFormatDecoder { * @param availableBytes - bytes available starting at the current processedBytes position * @param requestedSize - maximum desired segment size * @param streamComplete - true when the stream has ended (allows draining the tail) + * @param rawData - optional; the first `Math.min(requestedSize, availableBytes)` raw audio + * bytes (starting at the current processedBytes position in the stream), made available + * for format-specific frame-sync scanning. WAV and MP3 decoders ignore this parameter; + * FLAC and similar variable-frame formats use it to find the last clean frame boundary + * within the candidate window. */ getAlignedSegmentSize( info: FormatInfo, availableBytes: number, requestedSize: number, - streamComplete: boolean + streamComplete: boolean, + rawData?: Uint8Array ): number; /** diff --git a/DeepDrftPublic/Interop/audio/StreamDecoder.ts b/DeepDrftPublic/Interop/audio/StreamDecoder.ts index 5288487..e4c7943 100644 --- a/DeepDrftPublic/Interop/audio/StreamDecoder.ts +++ b/DeepDrftPublic/Interop/audio/StreamDecoder.ts @@ -244,20 +244,31 @@ export class StreamDecoder { const segmentSize = 64 * 1024; // 64KB segments const availableBytes = this.totalRawBytes - this.processedBytes; + + // Peek the candidate window first so the aligner can scan for a format-specific + // frame boundary (FLAC). extractAlignedData is non-destructive — it reads from + // rawChunks without advancing processedBytes — so reading before alignment is safe. + const peekSize = Math.min(segmentSize, availableBytes); + if (peekSize === 0) return null; + const peekBytes = this.extractAlignedData(peekSize); + // Passing streamComplete lets the aligner relax the min-frame guard // for the final tail; otherwise residual <512-byte tails get dropped. const alignedSize = this.formatDecoder!.getAlignedSegmentSize( this.formatInfo, availableBytes, segmentSize, - this.streamComplete + this.streamComplete, + peekBytes ); if (alignedSize <= 0) return null; const segmentOffset = this.processedBytes; - const rawSegment = this.extractAlignedData(alignedSize); + // alignedSize is always ≤ peekSize ≤ peekBytes.length, so subarray is in-bounds + // and zero-copy — no second extraction needed. + const rawSegment = peekBytes.subarray(0, alignedSize); const decodableSegment = this.formatDecoder!.wrapSegment(this.formatInfo, rawSegment); try {