fix(seo): escape inline JSON-LD, per-release byArtist, soft-404 + env-gated noindex

Escape </>& in JSON-LD body to kill script-breakout; byArtist now uses the release artist; detail-page not-found branches emit noindex; default robots gated to Production via a PersistentState SeoEnvironment bridge.
This commit is contained in:
daniel-c-harvey
2026-06-23 06:10:03 -04:00
parent f3b89ca9d7
commit f976af0f7c
11 changed files with 157 additions and 9 deletions
@@ -0,0 +1,33 @@
using Microsoft.AspNetCore.Components;
namespace DeepDrftPublic.Client.Common;
/// <summary>
/// Environment-gated robots bridge (Phase 22 remediation §4). The beta/staging site is web-hosted and must
/// not be crawled, so the <i>default</i> robots directive is environment-gated: <c>index,follow</c> only in
/// Production, <c>noindex,nofollow</c> everywhere else. A per-page <see cref="SeoModel.Robots"/> override
/// still wins — this only sets the default.
///
/// <para>
/// Crawlers read the server-prerendered HTML, so correctness lives in the server prerender pass — but the
/// value must be identical across the InteractiveAuto double render (AC6), so the WASM pass has to resolve
/// the same flag. The WASM assembly has no <c>IWebHostEnvironment</c> (config comes from the server). This
/// mirrors the DarkMode bridge exactly: a scoped service the server seeds during prerender (from
/// <c>IWebHostEnvironment.IsProduction()</c>) and <c>[PersistentState]</c> rounds to the client, so both
/// passes resolve the identical value. <c>SeoHead</c> injects this rather than an environment dependency,
/// honouring the no-environment-in-the-component constraint.
/// </para>
/// </summary>
public class SeoEnvironment
{
/// <summary>
/// True only in Production. Seeded server-side and persisted across the WASM boot. Defaults to
/// <c>false</c> so the fail-safe is "do not index" — a missing bridge never accidentally opens a
/// non-production site to crawlers.
/// </summary>
[PersistentState]
public bool IsProduction { get; set; }
/// <summary>The environment-gated default robots directive. Explicit page values override this.</summary>
public string DefaultRobots => IsProduction ? "index,follow" : "noindex,nofollow";
}
+19 -2
View File
@@ -22,13 +22,30 @@ public static class SeoJsonLd
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
// schema.org keys are PascalCase ("@type", "byArtist", "datePublished"); JsonPropertyName drives
// each. Encoder relaxed so the JSON sits inline in HTML without over-escaping apostrophes etc.
// Note: the relaxed encoder leaves <, >, & raw — InlineSafe re-escapes exactly those before the
// body is injected into the <script> element. See Serialize.
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
WriteIndented = false,
};
/// <summary>Renders a node to its compact JSON-LD script body. The host component wraps it in the script tag.</summary>
/// <summary>
/// Renders a node to its compact JSON-LD script body. The host component wraps it in the script tag.
/// The body is run through <see cref="InlineSafe"/> so CMS-authored values containing
/// <c>&lt;/script&gt;</c> or <c>&lt;</c> cannot break out of the inline script element (XSS).
/// </summary>
public static string Serialize<TNode>(TNode node) where TNode : JsonLdNode =>
JsonSerializer.Serialize(node, node.GetType(), Options);
InlineSafe(JsonSerializer.Serialize(node, node.GetType(), Options));
/// <summary>
/// Escapes the three characters that can break out of an inline <c>&lt;script type="application/ld+json"&gt;</c>
/// element. Replacing <c>&lt;</c>/<c>&gt;</c>/<c>&amp;</c> with their <c>\uXXXX</c> JSON escapes keeps the
/// JSON byte-for-byte equivalent on parse (a JSON string treats <c><</c> and <c>&lt;</c> identically)
/// while making <c>&lt;/script&gt;</c> impossible to emit raw — the documented safe pattern for inline JSON-LD.
/// </summary>
internal static string InlineSafe(string json) => json
.Replace("<", "\\u003C")
.Replace(">", "\\u003E")
.Replace("&", "\\u0026");
}
/// <summary>Base for every schema.org node: emits <c>@context</c> and <c>@type</c> first.</summary>
+3 -1
View File
@@ -119,7 +119,9 @@ public sealed record SeoModel
{
var canonicalPath = ReleaseRoutes.DetailHref(release.EntryKey, release.Medium);
var image = SeoUrls.CoverOrDefault(options, release.ImagePath);
var artist = new ArtistRef { Name = options.SiteName };
// byArtist reflects the release's own artist, consistent with the music:musician OG tag (Daniel's
// call) — not the collective name. Album sub-recordings share it: the tracks are by this artist.
var artist = new ArtistRef { Name = release.Artist };
var description = string.IsNullOrWhiteSpace(release.Description) ? options.DefaultDescription : release.Description;
// A mix is a single recording; its duration comes from the (single) track when present.
+3 -2
View File
@@ -40,8 +40,9 @@ public sealed record SeoOptions
/// <summary>The collective's primary genre, used in the MusicGroup JSON-LD node.</summary>
public string Genre { get; init; } = "Electronic";
/// <summary>Default robots directive; a page may override (e.g. the 404's <c>noindex,follow</c>).</summary>
public string DefaultRobots { get; init; } = "index,follow";
// The default robots directive is NOT a static option — it is environment-gated (Production →
// index,follow; non-production → noindex,nofollow) via SeoEnvironment so the beta/staging site is
// never crawled. A page's explicit SeoModel.Robots still overrides that default.
/// <summary>
/// Public social profile URLs for the MusicGroup <c>sameAs</c> array (OQ3). Instagram only —
+4 -1
View File
@@ -1,5 +1,6 @@
@using DeepDrftPublic.Client.Common
@inject SeoOptions Seo
@inject SeoEnvironment SeoEnv
@inject NavigationManager Nav
@*
@@ -83,7 +84,9 @@
{
_fullTitle = $"{Model.Title} · {Seo.TitleSuffix}";
_description = string.IsNullOrWhiteSpace(Model.Description) ? Seo.DefaultDescription : Model.Description;
_robots = string.IsNullOrWhiteSpace(Model.Robots) ? Seo.DefaultRobots : Model.Robots;
// Default robots is environment-gated (non-production → noindex,nofollow) so beta/staging is never
// crawled; an explicit per-page Robots still wins (e.g. the 404's / soft-404's noindex,follow).
_robots = string.IsNullOrWhiteSpace(Model.Robots) ? SeoEnv.DefaultRobots : Model.Robots;
_ogType = OgTypeString(Model.OgType);
// Canonical: BaseUrl + the model's path, defaulting to the current relative path. The origin is
@@ -16,6 +16,9 @@
}
else if (ViewModel.NotFound || ViewModel.Release is null)
{
@* Soft-404: a bad key renders a 200 "not found" view, so it must carry noindex so it is not indexed
(mirrors the dedicated /404 NotFound page). *@
<SeoHead Model="@SeoModel.ForNotFound(Seo)" />
<div class="deepdrft-track-detail-container">
<div class="deepdrft-track-detail-masthead">
<MudText Typo="Typo.h4" Align="Align.Center">Cut not found.</MudText>
@@ -15,6 +15,9 @@
}
else if (ViewModel.NotFound || ViewModel.Release is null)
{
@* Soft-404: a bad key renders a 200 "not found" view, so it must carry noindex so it is not indexed
(mirrors the dedicated /404 NotFound page). *@
<SeoHead Model="@SeoModel.ForNotFound(Seo)" />
<div class="deepdrft-track-detail-container">
<div class="deepdrft-track-detail-masthead">
<MudText Typo="Typo.h4" Align="Align.Center">Mix not found.</MudText>
@@ -19,6 +19,9 @@
}
else if (ViewModel.NotFound || ViewModel.Release is null)
{
@* Soft-404: a bad key renders a 200 "not found" view, so it must carry noindex so it is not indexed
(mirrors the dedicated /404 NotFound page). *@
<SeoHead Model="@SeoModel.ForNotFound(Seo)" />
<div class="deepdrft-track-detail-container">
<div class="deepdrft-track-detail-masthead">
<MudText Typo="Typo.h4" Align="Align.Center">Session not found.</MudText>
+5
View File
@@ -50,6 +50,11 @@ public static class Startup
// image, social links). Singleton: stateless config, identical in the server-prerender and WASM
// passes (this method runs in both), which is what makes SeoHead's double-render output identical.
services.AddSingleton(new SeoOptions());
// Environment-gated robots bridge. Scoped + [PersistentState] like DarkModeSettings: the server
// seeds IsProduction during prerender and it rounds to the WASM pass, so SeoHead resolves the same
// default robots in both render passes (non-production → noindex,nofollow, keeping beta uncrawled).
services.AddScoped<SeoEnvironment>();
}
public static void ConfigureApiHttpClient(IServiceCollection services, string baseAddress)