feature: Phase 23 Track A — env-gated /robots.txt + /sitemap.xml public crawl endpoints
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
namespace DeepDrftPublic.Seo;
|
||||
|
||||
/// <summary>
|
||||
/// Pure composition of the <c>robots.txt</c> body (Phase 23 wave 23.1). The environment gate is the
|
||||
/// caller's: the endpoint reads <see cref="Microsoft.AspNetCore.Hosting.IWebHostEnvironment.IsProduction"/>
|
||||
/// server-side and passes the boolean here, so the production-vs-beta branch lives in one testable place.
|
||||
/// Fail-safe is closed — anything that is not Production yields <c>Disallow: /</c> (Invariant E1).
|
||||
/// </summary>
|
||||
public static class RobotsTxt
|
||||
{
|
||||
/// <summary>
|
||||
/// Builds the directive body. In Production: allow everything except the embed shell and the proxy API
|
||||
/// paths, plus a <c>Sitemap:</c> pointer (OQ-R2). In any non-production environment: a closed door
|
||||
/// (<c>Disallow: /</c>) with no sitemap pointer, so a crawl of beta sees nothing and the sitemap is
|
||||
/// never advertised.
|
||||
/// </summary>
|
||||
/// <param name="isProduction">The server-side <c>IsProduction()</c> result — the single gate.</param>
|
||||
/// <param name="baseUrl">Canonical origin (no trailing slash) for the <c>Sitemap:</c> line; Production only.</param>
|
||||
public static string Build(bool isProduction, string baseUrl)
|
||||
{
|
||||
if (!isProduction)
|
||||
{
|
||||
return "User-agent: *\n" +
|
||||
"Disallow: /\n";
|
||||
}
|
||||
|
||||
var origin = baseUrl.TrimEnd('/');
|
||||
return "User-agent: *\n" +
|
||||
"Allow: /\n" +
|
||||
"Disallow: /FramePlayer\n" +
|
||||
"Disallow: /api/\n" +
|
||||
$"Sitemap: {origin}/sitemap.xml\n";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
using System.Text;
|
||||
using System.Xml;
|
||||
using System.Xml.Linq;
|
||||
using DeepDrftModels.DTOs;
|
||||
using DeepDrftPublic.Client.Common;
|
||||
|
||||
namespace DeepDrftPublic.Seo;
|
||||
|
||||
/// <summary>
|
||||
/// Pure composition of the sitemaps.org <c>urlset</c> document (Phase 23 wave 23.2). Enumerates the fixed
|
||||
/// indexable roots plus one entry per release, every <c><loc></c> absolutized against
|
||||
/// <see cref="SeoOptions.BaseUrl"/> and per-release paths resolved through
|
||||
/// <see cref="ReleaseRoutes.DetailHref(string, DeepDrftModels.Enums.ReleaseMedium)"/> — so each sitemap URL
|
||||
/// equals the page's <c>SeoHead</c> canonical by construction. No fetch, no env logic: the endpoint owns the
|
||||
/// gate and the release walk; this turns the gathered DTOs into XML and never throws on partial input.
|
||||
/// </summary>
|
||||
public static class SitemapXml
|
||||
{
|
||||
private static readonly XNamespace Ns = "http://www.sitemaps.org/schemas/sitemap/0.9";
|
||||
|
||||
/// <summary>
|
||||
/// The indexable static roots (OQ-S3). An explicit list, deliberately NOT derived from the nav index:
|
||||
/// the indexable set is not the nav set (e.g. <c>/FramePlayer</c> is nav-absent and must stay out, and a
|
||||
/// new nav entry is not automatically sitemap-worthy). Revisit here if the indexable-roots set grows.
|
||||
/// </summary>
|
||||
public static readonly IReadOnlyList<string> StaticRoots = ["/", "/about", "/cuts", "/sessions", "/mixes", "/archive"];
|
||||
|
||||
/// <summary>
|
||||
/// Builds the full <c>urlset</c>: the static roots (no <c>lastmod</c>) followed by one <c><url></c>
|
||||
/// per release. A release carries a <c><lastmod></c> sourced from <see cref="ReleaseDto.ReleaseDate"/>
|
||||
/// in W3C <c>YYYY-MM-DD</c> form when present (OQ-S2 — the release date, accepted as a plausible crawl hint).
|
||||
/// A null/empty release set yields a well-formed roots-only document.
|
||||
/// </summary>
|
||||
/// <param name="baseUrl">Canonical origin (no trailing slash) every <c><loc></c> is built from.</param>
|
||||
/// <param name="releases">The gathered releases; may be empty or partial after an upstream failure.</param>
|
||||
public static string Build(string baseUrl, IEnumerable<ReleaseDto> releases)
|
||||
{
|
||||
var origin = baseUrl.TrimEnd('/');
|
||||
|
||||
var roots = StaticRoots.Select(path => UrlElement(origin + path, lastmod: null));
|
||||
var releaseUrls = releases.Select(release => UrlElement(
|
||||
origin + ReleaseRoutes.DetailHref(release.EntryKey, release.Medium),
|
||||
release.ReleaseDate?.ToString("yyyy-MM-dd")));
|
||||
|
||||
var urlset = new XElement(Ns + "urlset", roots.Concat(releaseUrls));
|
||||
var document = new XDocument(new XDeclaration("1.0", "UTF-8", null), urlset);
|
||||
|
||||
// Save through a byte-based UTF-8 stream so the XML declaration reads encoding="utf-8". An
|
||||
// XmlWriter over a StringBuilder/StringWriter is character-based (UTF-16) and would stamp the
|
||||
// declaration utf-16, which is wrong for a body served as application/xml.
|
||||
using var stream = new MemoryStream();
|
||||
var settings = new XmlWriterSettings { Encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), Indent = true };
|
||||
using (var xmlWriter = XmlWriter.Create(stream, settings))
|
||||
{
|
||||
document.Save(xmlWriter);
|
||||
}
|
||||
|
||||
return Encoding.UTF8.GetString(stream.ToArray());
|
||||
}
|
||||
|
||||
private static XElement UrlElement(string loc, string? lastmod)
|
||||
{
|
||||
var element = new XElement(Ns + "url", new XElement(Ns + "loc", loc));
|
||||
if (lastmod is not null)
|
||||
element.Add(new XElement(Ns + "lastmod", lastmod));
|
||||
return element;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user