using System.Text; using System.Xml; using System.Xml.Linq; using DeepDrftModels.DTOs; using DeepDrftPublic.Client.Common; namespace DeepDrftPublic.Seo; /// /// Pure composition of the sitemaps.org urlset document (Phase 23 wave 23.2). Enumerates the fixed /// indexable roots plus one entry per release, every <loc> absolutized against /// and per-release paths resolved through /// — so each sitemap URL /// equals the page's SeoHead canonical by construction. No fetch, no env logic: the endpoint owns the /// gate and the release walk; this turns the gathered DTOs into XML and never throws on partial input. /// public static class SitemapXml { private static readonly XNamespace Ns = "http://www.sitemaps.org/schemas/sitemap/0.9"; /// /// The indexable static roots (OQ-S3). An explicit list, deliberately NOT derived from the nav index: /// the indexable set is not the nav set (e.g. /FramePlayer is nav-absent and must stay out, and a /// new nav entry is not automatically sitemap-worthy). Revisit here if the indexable-roots set grows. /// public static readonly IReadOnlyList StaticRoots = ["/", "/about", "/cuts", "/sessions", "/mixes", "/archive"]; /// /// Builds the full urlset: the static roots (no lastmod) followed by one <url> /// per release. A release carries a <lastmod> sourced from /// in W3C YYYY-MM-DD form when present (OQ-S2 — the release date, accepted as a plausible crawl hint). /// A null/empty release set yields a well-formed roots-only document. /// /// Canonical origin (no trailing slash) every <loc> is built from. /// The gathered releases; may be empty or partial after an upstream failure. public static string Build(string baseUrl, IEnumerable releases) { var origin = baseUrl.TrimEnd('/'); var roots = StaticRoots.Select(path => UrlElement(origin + path, lastmod: null)); var releaseUrls = releases.Select(release => UrlElement( origin + ReleaseRoutes.DetailHref(release.EntryKey, release.Medium), release.ReleaseDate?.ToString("yyyy-MM-dd"))); var urlset = new XElement(Ns + "urlset", roots.Concat(releaseUrls)); var document = new XDocument(new XDeclaration("1.0", "UTF-8", null), urlset); // Save through a byte-based UTF-8 stream so the XML declaration reads encoding="utf-8". An // XmlWriter over a StringBuilder/StringWriter is character-based (UTF-16) and would stamp the // declaration utf-16, which is wrong for a body served as application/xml. using var stream = new MemoryStream(); var settings = new XmlWriterSettings { Encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), Indent = true }; using (var xmlWriter = XmlWriter.Create(stream, settings)) { document.Save(xmlWriter); } return Encoding.UTF8.GetString(stream.ToArray()); } private static XElement UrlElement(string loc, string? lastmod) { var element = new XElement(Ns + "url", new XElement(Ns + "loc", loc)); if (lastmod is not null) element.Add(new XElement(Ns + "lastmod", lastmod)); return element; } }