Merge p1-w2-tc-streaming-majors: streaming majors findings 5-14

This commit is contained in:
Daniel Harvey
2026-05-17 18:23:15 -04:00
12 changed files with 483 additions and 67 deletions
@@ -8,13 +8,23 @@ namespace DeepDrftContent.Services.Audio;
/// </summary>
public class WavOffsetService
{
/// <summary>
/// WAV audio format code for linear PCM. The pipeline (AudioProcessor,
/// WavOffsetService, and wavutils.ts) is PCM-only by design — IEEE Float
/// (format 3) and other formats are rejected at parse time so the
/// synthesized header here can safely assume PCM.
/// </summary>
public const short PcmFormat = 1;
/// <summary>
/// Creates a stream containing a synthesized WAV header followed by audio data from the specified offset.
/// The returned stream is composed of a small header buffer and a non-owning slice over the input
/// buffer — no copy of the audio payload is made.
/// </summary>
/// <param name="fullAudioBuffer">The complete WAV file buffer</param>
/// <param name="byteOffset">Byte offset into the raw audio data (not including original header)</param>
/// <returns>MemoryStream with new WAV header + audio data from offset, or null if invalid</returns>
public MemoryStream? CreateOffsetStream(byte[] fullAudioBuffer, long byteOffset)
/// <returns>Stream with new WAV header + audio data from offset, or null if invalid</returns>
public Stream? CreateOffsetStream(byte[] fullAudioBuffer, long byteOffset)
{
var format = ParseWavHeader(fullAudioBuffer);
if (format == null)
@@ -27,28 +37,44 @@ public class WavOffsetService
// Align to block boundary for clean audio
var alignedOffset = (byteOffset / format.BlockAlign) * format.BlockAlign;
// Calculate new data size
var newDataSize = format.DataSize - (int)alignedOffset;
// Calculate new data size (long arithmetic — DataSize may be up to ~4 GB)
var newDataSize = format.DataSize - alignedOffset;
if (newDataSize <= 0)
return null;
// Create new WAV header
var newHeader = CreateWavHeader(format, newDataSize);
// Calculate source position in original buffer
// MemoryStream does not support offsets or lengths beyond int.MaxValue.
// RF64 (>2 GB audio segments) is not supported; reject before truncating.
var sourcePosition = format.HeaderSize + alignedOffset;
if (sourcePosition > int.MaxValue || newDataSize > int.MaxValue)
throw new NotSupportedException("Audio file segment exceeds 2 GB; RF64 not supported");
// Create result stream: new header + audio data from offset
var resultStream = new MemoryStream(44 + newDataSize);
resultStream.Write(newHeader, 0, 44);
resultStream.Write(fullAudioBuffer, (int)sourcePosition, newDataSize);
resultStream.Position = 0;
var newDataSizeInt = (int)newDataSize;
var sourcePositionInt = (int)sourcePosition;
return resultStream;
// Create new WAV header using the format reported by the parsed header.
// PCM is the only format we accept (see PcmFormat / ParseWavHeader), but
// threading format.AudioFormat through keeps the header self-consistent
// and prevents drift if the validation contract is ever relaxed.
var newHeader = CreateWavHeader(format, newDataSizeInt);
// Compose: 44-byte header followed by a non-copying slice of the audio payload.
// Wrapping the original buffer in a MemoryStream window avoids a 100MB+ copy
// that the previous MemoryStream(capacity).Write(...) implementation forced.
var headerStream = new MemoryStream(newHeader, writable: false);
var dataStream = new MemoryStream(
fullAudioBuffer,
sourcePositionInt,
newDataSizeInt,
writable: false,
publiclyVisible: false);
return new ConcatStream(headerStream, dataStream);
}
/// <summary>
/// Parses the WAV header from a buffer to extract format information.
/// PCM-only — IEEE Float (format 3) and other non-PCM formats are rejected
/// so downstream synthesis can safely assume PCM sample encoding.
/// </summary>
public WavFormat? ParseWavHeader(byte[] buffer)
{
@@ -70,8 +96,9 @@ public class WavOffsetService
int bitsPerSample = 0;
int byteRate = 0;
int blockAlign = 0;
int dataSize = 0;
long dataSize = 0;
int headerSize = 0;
short audioFormat = 0;
bool foundFmt = false;
bool foundData = false;
@@ -82,14 +109,20 @@ public class WavOffsetService
var chunkId = Encoding.ASCII.GetString(buffer, chunkOffset, 4);
var chunkSize = BitConverter.ToInt32(buffer, chunkOffset + 4);
if (chunkSize < 0)
return null;
if (chunkId == "fmt ")
{
if (chunkSize < 16)
return null;
var audioFormat = BitConverter.ToInt16(buffer, chunkOffset + 8);
// Support PCM (1) and IEEE Float (3) formats
if (audioFormat != 1 && audioFormat != 3)
audioFormat = BitConverter.ToInt16(buffer, chunkOffset + 8);
// PCM only. Float32 WAVs were previously accepted here but the synthesized
// header below is PCM-shaped — accepting Float would produce a corrupt file
// claiming PCM with Float-encoded samples. AudioProcessor also rejects
// non-PCM at upload time so this branch is defense in depth.
if (audioFormat != PcmFormat)
return null;
channels = BitConverter.ToInt16(buffer, chunkOffset + 10);
@@ -106,7 +139,9 @@ public class WavOffsetService
}
else if (chunkId == "data")
{
dataSize = chunkSize;
// WAV stores DataSize as a 32-bit unsigned int. Read as uint to preserve
// values above int.MaxValue (files between 24 GB), then widen to long.
dataSize = (long)BitConverter.ToUInt32(buffer, chunkOffset + 4);
headerSize = chunkOffset + 8; // Audio data starts after 'data' + size (8 bytes)
foundData = true;
}
@@ -124,6 +159,7 @@ public class WavOffsetService
return null;
return new WavFormat(
AudioFormat: audioFormat,
SampleRate: sampleRate,
Channels: channels,
BitsPerSample: bitsPerSample,
@@ -135,7 +171,9 @@ public class WavOffsetService
}
/// <summary>
/// Creates a standard 44-byte PCM WAV header.
/// Creates a standard 44-byte WAV header. The audio format code is taken from
/// <paramref name="format"/> rather than hardcoded so the synthesized header matches
/// what was parsed (today always <see cref="PcmFormat"/>; see ParseWavHeader).
/// </summary>
public byte[] CreateWavHeader(WavFormat format, int dataSize)
{
@@ -150,7 +188,7 @@ public class WavOffsetService
// fmt chunk
header[12] = (byte)'f'; header[13] = (byte)'m'; header[14] = (byte)'t'; header[15] = (byte)' ';
BitConverter.GetBytes(16).CopyTo(header, 16); // fmt chunk size
BitConverter.GetBytes((short)1).CopyTo(header, 20); // Audio format (PCM)
BitConverter.GetBytes(format.AudioFormat).CopyTo(header, 20); // Audio format (from parsed header)
BitConverter.GetBytes((short)format.Channels).CopyTo(header, 22);
BitConverter.GetBytes(format.SampleRate).CopyTo(header, 24);
BitConverter.GetBytes(format.ByteRate).CopyTo(header, 28);
@@ -168,12 +206,110 @@ public class WavOffsetService
/// <summary>
/// WAV format information extracted from header.
/// </summary>
/// <param name="AudioFormat">WAV fmt-chunk audio format code (1 = PCM; the only value accepted today).</param>
public record WavFormat(
short AudioFormat,
int SampleRate,
int Channels,
int BitsPerSample,
int ByteRate,
int BlockAlign,
int DataSize,
long DataSize,
int HeaderSize
);
/// <summary>
/// Forward-only read stream over two underlying streams concatenated end-to-end.
/// Lets us serve "[synthesized header][slice of original buffer]" without
/// allocating a single contiguous buffer for the combined payload.
/// </summary>
internal sealed class ConcatStream : Stream
{
private readonly Stream _first;
private readonly Stream _second;
private readonly long _length;
private long _position;
public ConcatStream(Stream first, Stream second)
{
_first = first;
_second = second;
_length = first.Length + second.Length;
}
public override bool CanRead => true;
public override bool CanSeek => false;
public override bool CanWrite => false;
public override long Length => _length;
public override long Position
{
get => _position;
set => throw new NotSupportedException();
}
public override int Read(byte[] buffer, int offset, int count)
{
var total = 0;
// Loop over _first until it returns 0 (exhausted) or the caller's buffer
// is full. Stream.Read is not required to fill the buffer in one call even
// when data is available (e.g. a future non-MemoryStream _first), so we must
// keep pulling until we get 0 before advancing to _second.
while (count > 0 && _position < _first.Length)
{
var read = _first.Read(buffer, offset, count);
if (read == 0) break;
total += read;
_position += read;
offset += read;
count -= read;
}
if (count > 0)
{
var read = _second.Read(buffer, offset, count);
total += read;
_position += read;
}
return total;
}
public override async ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken = default)
{
var total = 0;
// Same loop contract as Read() — exhaust _first before reading _second.
while (!buffer.IsEmpty && _position < _first.Length)
{
var read = await _first.ReadAsync(buffer, cancellationToken);
if (read == 0) break;
total += read;
_position += read;
buffer = buffer[read..];
}
if (!buffer.IsEmpty)
{
var read = await _second.ReadAsync(buffer, cancellationToken);
total += read;
_position += read;
}
return total;
}
public override void Flush() { }
public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
public override void SetLength(long value) => throw new NotSupportedException();
public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();
protected override void Dispose(bool disposing)
{
if (disposing)
{
_first.Dispose();
_second.Dispose();
}
base.Dispose(disposing);
}
}
@@ -85,6 +85,53 @@ public abstract class MediaVault : VaultIndexDirectory
return (T)result;
}
/// <summary>
/// Opens a read-only stream over an entry's backing file plus its metadata
/// (extension/MIME), without buffering the file into memory.
/// Returns null if the entry is unknown or the backing file is missing.
///
/// Use this when the caller will forward bytes to a network response — the
/// existing <see cref="GetEntryAsync{T}"/> allocates a full <c>byte[]</c>
/// and pushes large WAVs onto the LOH for every request.
///
/// The caller owns the returned stream and must dispose it. Error-handling
/// follows the same swallow-and-return-null contract as the rest of the
/// FileDatabase API; the caller checks for null.
/// </summary>
public async Task<MediaStream?> GetEntryStreamAsync(string entryId)
{
try
{
if (!await HasIndexEntry(entryId))
return null;
var metaData = await GetEntryMetadata(entryId);
if (metaData == null)
return null;
var mediaPath = GetMediaPathFromEntryKey(metaData.MediaKey, metaData.Extension);
if (!FileUtils.FileExists(mediaPath))
return null;
// Async-capable, sequential-scan FileStream — the response writer will pull
// bytes in order. bufferSize matches FileUtils.FetchFileAsync (64 KB).
var stream = new FileStream(
mediaPath,
FileMode.Open,
FileAccess.Read,
FileShare.Read,
bufferSize: 64 * 1024,
useAsync: true);
return new MediaStream(stream, metaData.Extension);
}
catch
{
// Match FileDatabase error-swallow contract.
return null;
}
}
/// <summary>
/// Extracts buffer and extension from a media binary
/// </summary>
@@ -144,3 +191,22 @@ public class AudioVault : MediaVault
return null;
}
}
/// <summary>
/// An open read-only stream over a vault entry plus the extension needed to
/// resolve its MIME type. Caller owns the stream and must dispose it.
/// </summary>
public sealed class MediaStream : IDisposable, IAsyncDisposable
{
public Stream Stream { get; }
public string Extension { get; }
public MediaStream(Stream stream, string extension)
{
Stream = stream;
Extension = extension;
}
public void Dispose() => Stream.Dispose();
public ValueTask DisposeAsync() => Stream.DisposeAsync();
}
+57 -10
View File
@@ -1,5 +1,7 @@
using DeepDrftContent.Services.Audio;
using DeepDrftContent.Services.Audio;
using DeepDrftContent.Services.Constants;
using DeepDrftContent.Services.FileDatabase.Models;
using DeepDrftContent.Services.FileDatabase.Services;
using DeepDrftContent.Middleware;
using Microsoft.AspNetCore.Mvc;
@@ -38,6 +40,59 @@ public class TrackController : ControllerBase
try
{
// No-offset path: stream the file straight from disk so a 100 MB WAV does not
// force a 100 MB LOH allocation per request. The offset path still loads
// the full buffer because WavOffsetService block-aligns and reslices into
// a composite stream over the in-memory buffer.
if (offset == 0)
{
var vault = _fileDatabase.GetVault(VaultConstants.Tracks);
if (vault == null)
{
_logger.LogWarning("Tracks vault not found");
return NotFound();
}
var mediaStream = await vault.GetEntryStreamAsync(trackId);
if (mediaStream == null)
{
_logger.LogWarning("Track not found: {TrackId}", trackId);
return NotFound();
}
// Resolve MIME and log before handing the stream to File().
// If anything here throws, the finally block disposes the wrapper
// (and its inner FileStream) so neither leaks. On the success path
// File() takes ownership of the inner stream; ASP.NET Core disposes
// it after the response body is sent. The wrapper is a thin struct
// with no extra resources, so disposing it after extracting the
// inner stream is a no-op — we only call Dispose() in the catch path.
string streamMimeType;
long streamLength;
Stream innerStream;
try
{
streamMimeType = MimeTypeExtensions.GetMimeType(mediaStream.Extension);
streamLength = mediaStream.Stream.Length;
innerStream = mediaStream.Stream;
}
catch
{
await mediaStream.DisposeAsync();
throw;
}
_logger.LogInformation(
"Streaming track from disk: {TrackId}, Size: {Size} bytes",
trackId, streamLength);
// enableRangeProcessing: false — seek is served by WavOffsetService, not Range.
return File(innerStream, streamMimeType, enableRangeProcessing: false);
}
// Offset path: route through TrackService.GetAudioBinaryAsync (Track B's
// orchestrator boundary) so the controller stays out of FileDatabase directly.
// The buffered AudioBinary is required because WavOffsetService block-aligns
// and reslices into a composite stream over the in-memory buffer.
var file = await _trackService.GetAudioBinaryAsync(trackId);
if (file == null)
{
@@ -47,14 +102,6 @@ public class TrackController : ControllerBase
var mimeType = MimeTypeExtensions.GetMimeType(file.Extension);
// If no offset, return the full file
if (offset == 0)
{
_logger.LogInformation("Successfully retrieved track: {TrackId}, Size: {Size} bytes", trackId, file.Buffer.Length);
return File(file.Buffer, mimeType);
}
// Create offset stream with synthesized header
var offsetStream = _wavOffsetService.CreateOffsetStream(file.Buffer, offset);
if (offsetStream == null)
{
@@ -85,4 +132,4 @@ public class TrackController : ControllerBase
DeepDrftContent.Services.Constants.VaultConstants.Tracks, trackId, audioBinary);
return success ? Ok() : BadRequest("Failed to store audio track");
}
}
}
+15 -6
View File
@@ -7,7 +7,7 @@ public class TrackMediaResponse : IDisposable
{
public Stream Stream { get; }
public long ContentLength { get; }
public TrackMediaResponse(Stream stream, long contentLength)
{
Stream = stream;
@@ -23,13 +23,22 @@ public class TrackMediaResponse : IDisposable
public class TrackMediaClient
{
private readonly HttpClient _http;
public TrackMediaClient(IHttpClientFactory httpClientFactory)
{
_http = httpClientFactory.CreateClient("DeepDrft.Content");
}
public async Task<ApiResult<TrackMediaResponse>> GetTrackMedia(string trackId, long byteOffset = 0)
/// <summary>
/// Fetches the WAV stream for a track, optionally starting from a byte offset.
/// The cancellation token is forwarded to <see cref="HttpClient.GetAsync"/> so a
/// navigation or seek-replacement aborts the in-flight server connection rather
/// than leaving the server draining bytes into a dead socket.
/// </summary>
public async Task<ApiResult<TrackMediaResponse>> GetTrackMedia(
string trackId,
long byteOffset = 0,
CancellationToken cancellationToken = default)
{
try
{
@@ -39,11 +48,11 @@ public class TrackMediaClient
: $"api/track/{trackId}";
// Use HttpCompletionOption.ResponseHeadersRead to get stream immediately
var response = await _http.GetAsync(url, HttpCompletionOption.ResponseHeadersRead);
var response = await _http.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
response.EnsureSuccessStatusCode();
var contentLength = response.Content.Headers.ContentLength ?? 0;
var stream = await response.Content.ReadAsStreamAsync();
var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
return ApiResult<TrackMediaResponse>.CreatePassResult(new TrackMediaResponse(stream, contentLength));
}
@@ -52,4 +61,4 @@ public class TrackMediaClient
return ApiResult<TrackMediaResponse>.CreateFailResult(e.Message);
}
}
}
}
@@ -1,20 +1,20 @@
using DeepDrftWeb.Client.Services;
using DeepDrftWeb.Client.Services;
using DeepDrftWeb.Client.Clients;
using Microsoft.AspNetCore.Components;
using Microsoft.Extensions.Logging;
namespace DeepDrftWeb.Client.Controls;
public partial class AudioPlayerProvider : ComponentBase
public partial class AudioPlayerProvider : ComponentBase, IAsyncDisposable
{
[Inject] public required AudioInteropService AudioInterop { get; set; }
[Inject] public required TrackMediaClient TrackMediaClient { get; set; }
[Inject] public required ILogger<StreamingAudioPlayerService> Logger { get; set; }
private StreamingAudioPlayerService? _audioPlayerService;
[Parameter] public RenderFragment? ChildContent { get; set; }
protected override void OnInitialized()
{
// Create the service immediately (but don't initialize yet)
@@ -25,7 +25,7 @@ public partial class AudioPlayerProvider : ComponentBase
_audioPlayerService.OnStateChanged = new EventCallback(this, () => InvokeAsync(StateHasChanged));
// OnTrackSelected will be set by individual child components that need it
}
protected override async Task OnAfterRenderAsync(bool firstRender)
{
if (firstRender && _audioPlayerService != null)
@@ -35,4 +35,18 @@ public partial class AudioPlayerProvider : ComponentBase
StateHasChanged();
}
}
}
/// <summary>
/// Dispose the player on unmount so the JS setInterval driving progress
/// callbacks no longer holds a DotNetObjectReference into a destroyed
/// component (otherwise it throws every 100ms after navigation away).
/// </summary>
public async ValueTask DisposeAsync()
{
if (_audioPlayerService != null)
{
await _audioPlayerService.DisposeAsync();
_audioPlayerService = null;
}
}
}
@@ -393,7 +393,7 @@ public abstract class AudioPlayerService : IPlayerService, IAsyncDisposable
await OnTrackSelected.Value.InvokeAsync();
}
public async ValueTask DisposeAsync()
public virtual async ValueTask DisposeAsync()
{
if (IsInitialized)
{
@@ -88,7 +88,12 @@ public class StreamingAudioPlayerService : AudioPlayerService, IStreamingPlayerS
await NotifyStateChanged();
var mediaResult = await _trackMediaClient.GetTrackMedia(track.EntryKey);
// Pass the streaming token to the HTTP layer so a navigation/track switch
// aborts the server connection instead of leaving it draining bytes.
var mediaResult = await _trackMediaClient.GetTrackMedia(
track.EntryKey,
byteOffset: 0,
cancellationToken: _streamingCancellation.Token);
if (!mediaResult.Success)
{
var technicalError = mediaResult.GetMessage();
@@ -344,7 +349,10 @@ public class StreamingAudioPlayerService : AudioPlayerService, IStreamingPlayerS
await NotifyStateChanged();
// Request new stream from offset
var mediaResult = await _trackMediaClient.GetTrackMedia(_currentTrackId, byteOffset);
var mediaResult = await _trackMediaClient.GetTrackMedia(
_currentTrackId,
byteOffset,
cancellationToken: _streamingCancellation.Token);
if (!mediaResult.Success || mediaResult.Value == null)
{
var technicalError = mediaResult.GetMessage() ?? "Failed to load audio from position";
@@ -483,6 +491,25 @@ public class StreamingAudioPlayerService : AudioPlayerService, IStreamingPlayerS
}
}
/// <summary>
/// On component unmount we must cancel the in-flight streaming loop and tear
/// down JS callbacks before the JS side's setInterval fires again with a
/// stale DotNetObjectReference. ResetToIdle covers cancellation + JS stop
/// + state reset; the base then disposes the JS player and its callbacks.
/// </summary>
public override async ValueTask DisposeAsync()
{
try
{
await ResetToIdle();
}
catch
{
// Disposal must not throw; any failure here is best-effort cleanup.
}
await base.DisposeAsync();
}
private void AdaptBufferSize(int bytesRead, long readTimeMs)
{
// Adaptive buffer sizing based on network performance
+18 -4
View File
@@ -175,13 +175,21 @@ export class AudioPlayer {
}
}
startStreamingPlayback(): AudioResult {
async startStreamingPlayback(): Promise<AudioResult> {
if (!this.scheduler.hasBuffers()) {
return { success: false, error: 'No buffers available' };
}
try {
console.log('\n=== Starting streaming playback ===');
// A backgrounded tab leaves AudioContext suspended. createBufferSource/start
// against a suspended context produces no audio without throwing — the same
// failure mode that was fixed for play() (resume path). Awaiting ensureReady()
// here guarantees the context is running before playFromPosition schedules
// any AudioBufferSourceNodes.
await this.contextManager.ensureReady();
this.streamingStarted = true;
this.isPlaying = true;
this.isPaused = false;
@@ -199,7 +207,7 @@ export class AudioPlayer {
// ==================== Playback Control ====================
play(): AudioResult {
async play(): Promise<AudioResult> {
if (!this.isStreamingMode) {
return { success: false, error: 'Not in streaming mode' };
}
@@ -215,7 +223,11 @@ export class AudioPlayer {
}
try {
this.contextManager.ensureReady();
// Must await: a backgrounded tab leaves AudioContext suspended, and
// createBufferSource/source.start against a suspended context produces
// no audio without throwing. Firing ensureReady() without await meant
// play() returned success but the user heard nothing.
await this.contextManager.ensureReady();
this.isPlaying = true;
this.isPaused = false;
@@ -313,7 +325,9 @@ export class AudioPlayer {
private seekBeyondBuffer(position: number): AudioResult {
try {
const byteOffset = this.streamDecoder.calculateByteOffset(position);
if (byteOffset <= 0) {
// 0 is a valid offset (seek to start of audio data). Only a negative result
// indicates calculation failure — typically a missing/unparsed WAV header.
if (byteOffset < 0) {
return { success: false, error: 'Cannot calculate byte offset' };
}
@@ -110,7 +110,15 @@ export class PlaybackScheduler {
}
if (startBufferIndex >= this.buffers.length) {
console.log('Position beyond available buffers');
// Position landed at or past the end of all buffers. Previously this
// returned silently, leaving the player stuck "playing" with no source
// scheduled — a pause near the end followed by play never recovered.
// Treat this as end-of-track so listeners (UI / end callback) fire.
console.log('Position at/beyond available buffers — ending playback');
this.isActive_ = false;
this.playbackAnchorTime = 0;
this.playbackAnchorPosition = 0;
this.onPlaybackEnded?.();
return;
}
+102 -9
View File
@@ -12,6 +12,36 @@ export interface DecodedChunkResult {
duration: number;
}
/**
* Thrown when decodeAudioData exceeds the per-segment deadline. Distinct from
* DecodeError so callers (and operators reading logs) can tell a slow/throttled
* decoder from corrupt audio data — the previous "Decode timeout" string error
* was indistinguishable from any other Error and was silently swallowed.
*/
export class DecodeTimeoutError extends Error {
constructor(public readonly segmentOffset: number, public readonly byteCount: number) {
super(`Decode timeout at offset ${segmentOffset} (${byteCount} bytes)`);
this.name = 'DecodeTimeoutError';
}
}
/**
* Thrown when decodeAudioData rejects for non-timeout reasons (corrupt header,
* unsupported format, etc.). Carries the segment offset so callers can log
* which part of the stream failed.
*/
export class DecodeError extends Error {
constructor(
message: string,
public readonly segmentOffset: number,
public readonly byteCount: number,
public readonly cause?: Error
) {
super(message);
this.name = 'DecodeError';
}
}
export class StreamDecoder {
// Upper bound on pre-header accumulation. 256 KB is far beyond any sane WAV
// header (including extended LIST/INFO/JUNK chunks). If we have accumulated
@@ -173,7 +203,15 @@ export class StreamDecoder {
}
/**
* Try to decode the next segment of audio
* Try to decode the next segment of audio.
*
* Failure modes:
* - Decode timeout: retry once, then surface as DecodeTimeoutError (typed).
* - Other decode error (corrupt data, format mismatch): surface as DecodeError.
* Both are thrown rather than silently swallowed — callers (processChunk /
* markStreamComplete) decide whether to abort the stream or skip the segment.
* processedBytes is only advanced on success so a thrown failure does not
* silently consume the failed segment.
*/
private async tryDecodeNextSegment(): Promise<DecodedChunkResult | null> {
if (!this.wavHeader) return null;
@@ -199,15 +237,63 @@ export class StreamDecoder {
const wavFile = this.createWavFile(rawSegment);
try {
const buffer = await this.decodeWithTimeout(wavFile);
// Advance only after a successful decode so that a timeout or decode
// failure does not permanently skip the segment.
const buffer = await this.decodeWithRetry(wavFile, segmentOffset, alignedSize);
// Advance only after a successful decode so a thrown timeout/decode
// failure does not silently drop the segment.
this.processedBytes += alignedSize;
console.log(`✓ Decoded: ${buffer.duration.toFixed(3)}s, ${buffer.numberOfChannels}ch`);
return { buffer, duration: buffer.duration };
} catch (error) {
console.error(`Failed to decode segment at offset ${segmentOffset}:`, error);
return null;
// Re-throw typed errors so the outer drain loop in processChunk /
// markStreamComplete sees the real failure instead of an empty array.
// The previous silent return hid timeouts entirely.
if (error instanceof DecodeTimeoutError || error instanceof DecodeError) {
throw error;
}
// Unknown synchronous failure during decode — wrap and surface.
throw new DecodeError(
`Decode failed at offset ${segmentOffset} (${alignedSize} bytes): ${(error as Error).message}`,
segmentOffset,
alignedSize,
error as Error);
}
}
/**
* Decode with a single retry on timeout. Web Audio's decodeAudioData is
* occasionally flaky under tab throttling; a retry costs little and recovers
* the common transient case without dropping the segment.
*/
private async decodeWithRetry(
wavData: Uint8Array,
segmentOffset: number,
alignedSize: number): Promise<AudioBuffer> {
try {
return await this.decodeWithTimeout(wavData);
} catch (error) {
if (!(error instanceof DecodeTimeoutError)) {
throw new DecodeError(
`Decode failed at offset ${segmentOffset} (${alignedSize} bytes): ${(error as Error).message}`,
segmentOffset,
alignedSize,
error as Error);
}
console.warn(
`Decode timeout at offset ${segmentOffset} (${alignedSize} bytes) — retrying once`);
try {
return await this.decodeWithTimeout(wavData);
} catch (retryError) {
if (retryError instanceof DecodeTimeoutError) {
console.error(
`Decode timeout after retry at offset ${segmentOffset} (${alignedSize} bytes)`);
throw new DecodeTimeoutError(segmentOffset, alignedSize);
}
throw new DecodeError(
`Decode failed on retry at offset ${segmentOffset} (${alignedSize} bytes): ${(retryError as Error).message}`,
segmentOffset,
alignedSize,
retryError as Error);
}
}
}
@@ -257,18 +343,25 @@ export class StreamDecoder {
}
/**
* Decode with timeout to prevent hanging
* Decode with timeout to prevent hanging. Throws DecodeTimeoutError if the
* deadline expires so callers can distinguish timeout from corrupt-data
* failures (decodeAudioData throws DOMException for the latter).
*/
private async decodeWithTimeout(wavData: Uint8Array, timeoutMs: number = 5000): Promise<AudioBuffer> {
const buffer = new ArrayBuffer(wavData.length);
new Uint8Array(buffer).set(wavData);
const decodePromise = this.contextManager.decodeAudioData(buffer);
let timer: ReturnType<typeof setTimeout> | null = null;
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error('Decode timeout')), timeoutMs);
timer = setTimeout(() => reject(new DecodeTimeoutError(-1, wavData.length)), timeoutMs);
});
return Promise.race([decodePromise, timeoutPromise]);
try {
return await Promise.race([decodePromise, timeoutPromise]);
} finally {
if (timer !== null) clearTimeout(timer);
}
}
/**
+2 -2
View File
@@ -39,7 +39,7 @@ const DeepDrftAudio = {
return player.processStreamingChunk(chunk);
},
startStreamingPlayback: (playerId: string): AudioResult => {
startStreamingPlayback: async (playerId: string): Promise<AudioResult> => {
const player = audioPlayers.get(playerId);
if (!player) return { success: false, error: 'Player not found' };
return player.startStreamingPlayback();
@@ -57,7 +57,7 @@ const DeepDrftAudio = {
return player.ensureAudioContextReady();
},
play: (playerId: string): AudioResult => {
play: async (playerId: string): Promise<AudioResult> => {
const player = audioPlayers.get(playerId);
if (!player) return { success: false, error: 'Player not found' };
return player.play();
+5 -3
View File
@@ -51,9 +51,11 @@ class WavUtils {
if (chunkSize < 16) return null;
const audioFormat = view.getUint16(chunkOffset + 8, true);
// Support PCM (1) and IEEE Float (3) formats
if (audioFormat !== 1 && audioFormat !== 3) {
console.warn(`Unsupported audio format: ${audioFormat} (only PCM=1 and IEEE Float=3 supported)`);
// PCM only. The server's WavOffsetService synthesises PCM-shaped headers,
// and AudioProcessor rejects non-PCM at upload — accepting Float here would
// hand the decoder a header/payload mismatch that surfaces as garbled audio.
if (audioFormat !== 1) {
console.warn(`Unsupported audio format: ${audioFormat} (only PCM=1 supported)`);
return null;
}