diff --git a/DeepDrftPublic.Client/Services/StreamingAudioPlayerService.cs b/DeepDrftPublic.Client/Services/StreamingAudioPlayerService.cs index b90c010..0db0a7c 100644 --- a/DeepDrftPublic.Client/Services/StreamingAudioPlayerService.cs +++ b/DeepDrftPublic.Client/Services/StreamingAudioPlayerService.cs @@ -1292,11 +1292,25 @@ public class StreamingAudioPlayerService : AudioPlayerService, IStreamingPlayerS /// private async Task DrainBackpressureAsync(CancellationToken cancellationToken) { + // [BP-DIAG] Time how long the read loop is actually parked here. A drain that releases in a few + // hundred ms is healthy throttling; a drain that holds for many seconds (paired with the JS + // PAUSED line) confirms back-pressure is parking production — whereas NO drain lines alongside + // frequent JS underrun PARK/RESUME lines means the loop never throttled and decode simply could + // not keep up. Logged only when it actually waited, so steady-state no-op drains add no noise. + var stopwatch = System.Diagnostics.Stopwatch.StartNew(); + var waited = false; while (IsPaused || await _audioInterop.IsProductionPaused(PlayerId)) { + waited = true; cancellationToken.ThrowIfCancellationRequested(); await Task.Delay(BackpressurePollMs, cancellationToken); } + if (waited) + { + _logger.LogInformation( + "[BP-DIAG] DrainBackpressure released after {Ms} ms (isPaused={IsPaused}, gen={Gen})", + stopwatch.ElapsedMilliseconds, IsPaused, _loadGeneration); + } } private async Task ThrottledNotifyStateChanged() diff --git a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts index acb3b71..be02c45 100644 --- a/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts +++ b/DeepDrftPublic/Interop/audio/OpusStreamDecoder.ts @@ -163,6 +163,11 @@ export class OpusStreamDecoder implements IStreamingDecoder { // order-sensitive). configure() is deferred too — no need to spin up the decoder while // throttled. The C# loop also stops reading above high-water, so the stash stays small. if (this.isSchedulerFull?.()) { + // [BP-DIAG] First stash since last drain — production is now throttled and decode is parked. + // Trivially removable. + if (this.pendingBytes.length === 0) { + console.log('[BP-DIAG] Opus stash START (scheduler full, decode parked)'); + } this.pendingBytes.push(chunk); return []; } @@ -173,6 +178,8 @@ export class OpusStreamDecoder implements IStreamingDecoder { // the new chunk, through the demuxer as one contiguous feed. const out: AudioBuffer[] = []; if (this.pendingBytes.length > 0) { + // [BP-DIAG] Scheduler drained below low-water — replaying the stash. Trivially removable. + console.log(`[BP-DIAG] Opus stash DRAIN ${this.pendingBytes.length} chunks`); const stashed = this.pendingBytes; this.pendingBytes = []; for (const bytes of stashed) { @@ -375,6 +382,13 @@ export class OpusStreamDecoder implements IStreamingDecoder { let iters = 0; const poll = () => { if (!this.decoder || this.decoder.decodeQueueSize === 0 || iters >= MAX_YIELD_ITERS) { + // [BP-DIAG] If we hit the 200 ms ceiling with the decode queue still non-empty, the + // WebCodecs decoder is falling behind realtime (the throughput suspect for sustained + // underrun — worse with HW accel off). Frequent CAP lines pin decode, not back-pressure, + // as the block. Trivially removable. + if (this.decoder && iters >= MAX_YIELD_ITERS && this.decoder.decodeQueueSize > 0) { + console.log(`[BP-DIAG] Opus yield CAP hit, decodeQueueSize=${this.decoder.decodeQueueSize} (decoder behind realtime)`); + } resolve(); return; } diff --git a/DeepDrftPublic/Interop/audio/PlaybackScheduler.test.ts b/DeepDrftPublic/Interop/audio/PlaybackScheduler.test.ts index 4f92de1..1328ef1 100644 --- a/DeepDrftPublic/Interop/audio/PlaybackScheduler.test.ts +++ b/DeepDrftPublic/Interop/audio/PlaybackScheduler.test.ts @@ -441,6 +441,56 @@ test('clear and clearForSeek release the back-pressure latch (C2 latency parity) assertEqual(s.evaluateProductionPause(), false, 'clearForSeek resets the latch'); }); +// Production defaults (no setForwardWindow): the widened 60s/30s cushion. The byte cap is the +// UNCHANGED hard OOM bound; these defaults only govern the time window. buf(1) carries no byte +// fields, so getDecodedByteEstimate is NaN and the byte guard never fires — the time window alone +// governs, which is exactly what we want to pin here. +test('default forward window throttles at 60s and resumes at 30s (no setForwardWindow)', () => { + const cm = new FakeContextManager(); + const s = makeScheduler(cm); + // Deliberately no setForwardWindow() — exercise the PRODUCTION defaults (high 60s / low 30s). + for (let i = 0; i < 70; i++) s.addBuffer(buf(1)); // 70s decoded, track [0,70) + cm.now = 0; + s.playFromPosition(0); + advanceCursorToEnd(s, cm); + + cm.now = 0; // forward lookahead = 70s ≥ 60s high-water + assertEqual(s.evaluateProductionPause(), true, 'pauses at the 60s default high-water'); + cm.now = 35; // lookahead 35s: inside the 30..60 band → stays paused (hysteresis) + assertEqual(s.evaluateProductionPause(), true, 'holds through the widened band'); + cm.now = 45; // lookahead 25s ≤ 30s low-water → resume + assertEqual(s.evaluateProductionPause(), false, 'resumes at the 30s default low-water'); +}); + +// Lookahead correctness in the underrun state + the prime block hypothesis directly refuted: when the +// playhead has drained the queue mid-stream, forward lookahead must read ~0 (not a stale-high value) +// so production is NOT throttled while decoded audio is genuinely low. +test('forward lookahead is exact during an underrun park and never trips a false pause', () => { + const cm = new FakeContextManager(); + const s = makeScheduler(cm); + // 5s decoded, playback started, stream NOT complete. + for (let i = 0; i < 5; i++) s.addBuffer(buf(1)); + cm.now = 0; + s.playFromPosition(0); + + // Playhead advances past the decoded tail and the queue drains → mid-stream underrun park. + cm.now = 6; + drainAllSources(s, cm); + assertEqual(s.isActive(), false, 'parked in underrun'); + + // At the park the playhead sits at the decoded tail: forward lookahead is 0, so production must + // NOT be throttled (the "paused while decoded audio is low" hypothesis must not hold here). + assertClose(s.getForwardLookaheadSeconds(), 0, 'lookahead is 0 at the underrun tail'); + assertEqual(s.evaluateProductionPause(), false, 'low decoded audio does not pause production'); + + // Refill arriving during the park grows the lead monotonically; lookahead reflects exactly it, + // measured against the FROZEN playhead — not a stale pre-underrun position. + s.addBuffer(buf(1)); + s.addBuffer(buf(1)); + assertClose(s.getForwardLookaheadSeconds(), 2, 'lookahead equals the freshly-accumulated lead'); + assertEqual(s.evaluateProductionPause(), false, 'still unthrottled well below the 60s high-water'); +}); + // === False end-of-playback guard (Opus-startup misfire) ====================================== // // The scheduler must distinguish a GENUINE end-of-track (stream complete AND queue drained) from a diff --git a/DeepDrftPublic/Interop/audio/PlaybackScheduler.ts b/DeepDrftPublic/Interop/audio/PlaybackScheduler.ts index c3a25c4..244b018 100644 --- a/DeepDrftPublic/Interop/audio/PlaybackScheduler.ts +++ b/DeepDrftPublic/Interop/audio/PlaybackScheduler.ts @@ -44,23 +44,33 @@ const DEFAULT_BACK_RETAIN_SECONDS = 10; * for Opus, the demux/decode feed) pauses above the high-water mark and resumes below the * low-water mark — classic hysteresis so the two producers do not chatter on/off per chunk. * - * Provisional time-based defaults (OQ1 — 21.4 tunes them): - * - HIGH (30 s): the most decoded lookahead we hold ahead of the playhead before throttling. + * Time-based defaults — the cushion, NOT the memory bound: + * - HIGH (60 s): the most decoded lookahead we hold ahead of the playhead before throttling. * Comfortably above the playback-start minimum (`AudioPlayer.minBuffersForPlayback = 6` - * buffers, each typically 0.06 – 1 s depending on format/chunk size; at most a few seconds - * even at the high end), so C2 holds — first audio never waits on a throttle (the high-water - * is reached only well after playback is already running). - * - LOW (15 s): resume producing here. Kept generous so the forward fill never drains to the - * ~500 ms scheduler lookahead under normal network jitter (AC3 — no starvation). + * buffers, each typically 0.06 – 1 s depending on format/chunk size), so C2 holds — first + * audio never waits on a throttle (the high-water is reached only well after playback runs). + * - LOW (30 s): resume producing here. Kept generous so the forward fill never drains to the + * ~500 ms scheduler lookahead under network/decode jitter (AC3 — no starvation). + * + * Why 60/30 and not the old 30/15: the time window is a CUSHION knob, not the memory guarantee — + * the OQ3 byte ceiling below is the hard OOM bound. The old 30 s was sized for WAV's byte density + * and needlessly starved the cushion for the async WebCodecs Opus path, whose decoded float + * footprint is tiny (48 kHz stereo ≈ 0.37 MB/s, so 60 s ≈ 23 MB — a fraction of the 96 MB cap) + * yet whose per-packet decode jitter (HW-accel-off software decode, main-thread AudioData copies) + * needs a deeper buffer to stay ahead of the playhead. Doubling the window lets Opus use the memory + * headroom the byte cap already permits. The byte cap is UNCHANGED, so high-density formats + * (lossless) still pause at exactly the same footprint as before — the OOM fix does not regress. * * OQ3 hard memory ceiling: an absolute byte cap on total decoded float held, independent of the * time window. This is the guard-rail that makes "1 GB never OOMs" a guarantee rather than a - * tuning hope — if a pathological stream packs an unusual amount of decoded audio into the time - * window, the byte cap still pauses production. Estimated as channels × frames × 4 bytes (f32). + * tuning hope — production pauses on `lookahead >= high OR bytes > cap`, whichever fires first, so + * the footprint can never exceed the cap regardless of the time window. For dense lossless the + * byte cap fires before 60 s (bounding memory exactly as the old 30 s window's byte estimate did); + * for sparse Opus the time window fires first, at ~23 MB. Estimated as channels × frames × 4 (f32). */ -const DEFAULT_FORWARD_HIGH_WATER_SECONDS = 30; -const DEFAULT_FORWARD_LOW_WATER_SECONDS = 15; -const DEFAULT_MAX_DECODED_BYTES = 96 * 1024 * 1024; // ~96 MB of decoded float PCM +const DEFAULT_FORWARD_HIGH_WATER_SECONDS = 60; +const DEFAULT_FORWARD_LOW_WATER_SECONDS = 30; +const DEFAULT_MAX_DECODED_BYTES = 96 * 1024 * 1024; // ~96 MB of decoded float PCM — the HARD OOM bound const BYTES_PER_FLOAT_SAMPLE = 4; /** @@ -295,6 +305,7 @@ export class PlaybackScheduler { evaluateProductionPause(): boolean { const lookahead = this.getForwardLookaheadSeconds(); const overByteCeiling = this.maxDecodedBytes > 0 && this.getDecodedByteEstimate() > this.maxDecodedBytes; + const wasPaused = this.productionPaused_; if (this.productionPaused_) { // Stay paused until BOTH the time window has drained below low-water AND the byte @@ -306,6 +317,19 @@ export class PlaybackScheduler { this.productionPaused_ = true; } + // [BP-DIAG] Log only the latch TRANSITIONS (not per-call) so a browser run shows exactly when + // production was throttled and the live numbers at that instant — the test for "production + // paused while decoded audio is actually low" (the prime block hypothesis). If a PAUSED line + // ever shows a small lookahead, the lookahead computation is the culprit; if it always shows + // ~high-water, back-pressure is innocent and the symptom is decode throughput. Trivially removable. + if (wasPaused !== this.productionPaused_) { + console.log( + `[BP-DIAG] production ${this.productionPaused_ ? 'PAUSED' : 'RESUMED'} ` + + `lookahead=${lookahead.toFixed(2)}s bytes=${(this.getDecodedByteEstimate() / 1048576).toFixed(1)}MB ` + + `buffers=${this.buffers.length} nextIdx=${this.nextBufferIndex} ` + + `pos=${this.getCurrentPosition().toFixed(2)}s overByteCeiling=${overByteCeiling}`); + } + return this.productionPaused_; } @@ -476,6 +500,12 @@ export class PlaybackScheduler { if (!this.streamComplete && !this.hasMinimumPlaybackLead()) { return; // still re-accumulating the rebuffer lead — remain parked } + // [BP-DIAG] Underrun resume — the playhead drained mid-stream and we have now rebuilt the + // lead. Frequent RESUME lines (paired with the PARK lines below) are the "repeatedly hits end + // of buffer" thrash: decode is not staying ahead. Trivially removable. + console.log( + `[BP-DIAG] underrun RESUME lead=${this.getForwardLookaheadSeconds().toFixed(2)}s ` + + `buffers=${this.buffers.length} nextIdx=${this.nextBufferIndex} streamComplete=${this.streamComplete}`); this.underrun_ = false; this.isActive_ = true; this.playbackAnchorTime = this.contextManager.currentTime; @@ -580,6 +610,12 @@ export class PlaybackScheduler { this.finishPlayback(); } else { this.underrun_ = true; + // [BP-DIAG] Mid-stream underrun: the scheduled queue drained and decode has not caught up. + // This is the symptom Daniel reports. The paired RESUME line above shows how long the gap + // lasted and what lead it rebuilt to. Trivially removable. + console.log( + `[BP-DIAG] underrun PARK pos=${this.getCurrentPosition().toFixed(2)}s ` + + `buffers=${this.buffers.length} nextIdx=${this.nextBufferIndex}`); // Hold the playhead at the decoded tail so getCurrentPosition stays exact during // the gap. isActive_ goes false so no stale-anchor scheduling occurs; resume // re-anchors at currentTime when buffers arrive.