feature: Phase 23 Track A — env-gated /robots.txt + /sitemap.xml public crawl endpoints

This commit is contained in:
daniel-c-harvey
2026-06-23 07:23:42 -04:00
parent 9a4b79d377
commit 5f4807cc4a
6 changed files with 432 additions and 0 deletions
+3
View File
@@ -40,6 +40,9 @@
The queue is pure domain logic, unit-testable against a fake IStreamingPlayerService
with no browser/JS. -->
<ProjectReference Include="..\DeepDrftPublic.Client\DeepDrftPublic.Client.csproj" />
<!-- Referenced for the Phase 23 crawl-directive builders (RobotsTxt / SitemapXml) — pure
string/XML composition over the env flag and release DTOs, unit-testable without HTTP. -->
<ProjectReference Include="..\DeepDrftPublic\DeepDrftPublic.csproj" />
</ItemGroup>
</Project>
+62
View File
@@ -0,0 +1,62 @@
using DeepDrftPublic.Seo;
namespace DeepDrftTests;
/// <summary>
/// Unit tests for <see cref="RobotsTxt"/> — the pure environment-branch composition of the robots.txt body
/// (Phase 23 wave 23.1). The gate (Production vs. anything-else) is the load-bearing branch: Production
/// allows + points at the sitemap and disallows the non-page routes; every non-production environment is a
/// closed door with no sitemap pointer (Invariant E1).
/// </summary>
[TestFixture]
public class RobotsTxtTests
{
private const string BaseUrl = "https://deepdrft.com";
[Test]
public void Build_Production_AllowsAndPointsAtSitemap()
{
var body = RobotsTxt.Build(isProduction: true, BaseUrl);
Assert.Multiple(() =>
{
Assert.That(body, Does.Contain("User-agent: *"));
Assert.That(body, Does.Contain("Allow: /"));
Assert.That(body, Does.Contain("Sitemap: https://deepdrft.com/sitemap.xml"));
});
}
[Test]
public void Build_Production_DisallowsFramePlayerAndApi()
{
var body = RobotsTxt.Build(isProduction: true, BaseUrl);
Assert.Multiple(() =>
{
Assert.That(body, Does.Contain("Disallow: /FramePlayer"));
Assert.That(body, Does.Contain("Disallow: /api/"));
});
}
[Test]
public void Build_NonProduction_DisallowsEverythingWithNoSitemapPointer()
{
var body = RobotsTxt.Build(isProduction: false, BaseUrl);
Assert.Multiple(() =>
{
Assert.That(body, Does.Contain("User-agent: *"));
Assert.That(body, Does.Contain("Disallow: /"));
Assert.That(body, Does.Not.Contain("Allow:"));
Assert.That(body, Does.Not.Contain("Sitemap:"));
});
}
[Test]
public void Build_Production_TrimsTrailingSlashOnBaseUrl()
{
var body = RobotsTxt.Build(isProduction: true, "https://deepdrft.com/");
Assert.That(body, Does.Contain("Sitemap: https://deepdrft.com/sitemap.xml"));
}
}
+154
View File
@@ -0,0 +1,154 @@
using System.Xml.Linq;
using DeepDrftModels.DTOs;
using DeepDrftModels.Enums;
using DeepDrftPublic.Client.Common;
using DeepDrftPublic.Seo;
namespace DeepDrftTests;
/// <summary>
/// Unit tests for <see cref="SitemapXml"/> — the pure sitemaps.org urlset composition (Phase 23 wave 23.2).
/// The document is parsed back to an <see cref="XDocument"/> so each assertion checks real structure, not a
/// substring: that every <c>&lt;loc&gt;</c> is absolute and built through <see cref="ReleaseRoutes"/> (so it
/// equals the page canonical), that <c>&lt;lastmod&gt;</c> tracks the release date, that the static roots are
/// present and FramePlayer is absent, and that empty input still yields a well-formed roots-only document.
/// </summary>
[TestFixture]
public class SitemapXmlTests
{
private const string BaseUrl = "https://deepdrft.com";
private static readonly XNamespace Ns = "http://www.sitemaps.org/schemas/sitemap/0.9";
private static ReleaseDto Release(string entryKey, ReleaseMedium medium, DateOnly? releaseDate = null) => new()
{
EntryKey = entryKey,
Title = "Title",
Artist = "Artist",
Medium = medium,
ReleaseDate = releaseDate,
};
private static List<string> Locs(string xml)
{
var doc = XDocument.Parse(xml);
return doc.Root!.Elements(Ns + "url")
.Select(u => u.Element(Ns + "loc")!.Value)
.ToList();
}
[Test]
public void Build_EmptyReleases_YieldsWellFormedRootsOnlyDocument()
{
var xml = SitemapXml.Build(BaseUrl, []);
var locs = Locs(xml);
Assert.Multiple(() =>
{
Assert.That(locs, Has.Count.EqualTo(SitemapXml.StaticRoots.Count));
Assert.That(locs, Does.Contain("https://deepdrft.com/"));
Assert.That(locs, Does.Contain("https://deepdrft.com/about"));
Assert.That(locs, Does.Contain("https://deepdrft.com/cuts"));
Assert.That(locs, Does.Contain("https://deepdrft.com/sessions"));
Assert.That(locs, Does.Contain("https://deepdrft.com/mixes"));
Assert.That(locs, Does.Contain("https://deepdrft.com/archive"));
});
}
[Test]
public void Build_IsWellFormedUrlsetWithSitemapsOrgNamespace()
{
var xml = SitemapXml.Build(BaseUrl, []);
var doc = XDocument.Parse(xml);
Assert.Multiple(() =>
{
Assert.That(doc.Root!.Name, Is.EqualTo(Ns + "urlset"));
Assert.That(xml, Does.Contain("utf-8").IgnoreCase);
});
}
[Test]
public void Build_FramePlayerIsNeverAStaticRoot()
{
var xml = SitemapXml.Build(BaseUrl, []);
Assert.That(Locs(xml), Has.None.Contains("FramePlayer"));
}
[TestCase(ReleaseMedium.Cut, "https://deepdrft.com/cuts/key-1")]
[TestCase(ReleaseMedium.Session, "https://deepdrft.com/sessions/key-1")]
[TestCase(ReleaseMedium.Mix, "https://deepdrft.com/mixes/key-1")]
public void Build_ReleaseLoc_IsAbsoluteAndResolvedThroughReleaseRoutes(ReleaseMedium medium, string expectedLoc)
{
var xml = SitemapXml.Build(BaseUrl, [Release("key-1", medium)]);
// The loc must equal BaseUrl + ReleaseRoutes.DetailHref — i.e. the page's SeoHead canonical, by construction.
var expected = BaseUrl + ReleaseRoutes.DetailHref("key-1", medium);
Assert.Multiple(() =>
{
Assert.That(expected, Is.EqualTo(expectedLoc));
Assert.That(Locs(xml), Does.Contain(expectedLoc));
});
}
[Test]
public void Build_AllReleasesEnumerated_AppendedAfterStaticRoots()
{
var releases = new[]
{
Release("a", ReleaseMedium.Cut),
Release("b", ReleaseMedium.Mix),
Release("c", ReleaseMedium.Session),
};
var xml = SitemapXml.Build(BaseUrl, releases);
Assert.That(Locs(xml), Has.Count.EqualTo(SitemapXml.StaticRoots.Count + releases.Length));
}
[Test]
public void Build_ReleaseWithDate_EmitsW3CLastmod()
{
var xml = SitemapXml.Build(BaseUrl, [Release("key-1", ReleaseMedium.Cut, new DateOnly(2026, 5, 12))]);
var doc = XDocument.Parse(xml);
var releaseUrl = doc.Root!.Elements(Ns + "url")
.Single(u => u.Element(Ns + "loc")!.Value.EndsWith("/cuts/key-1"));
Assert.That(releaseUrl.Element(Ns + "lastmod")!.Value, Is.EqualTo("2026-05-12"));
}
[Test]
public void Build_ReleaseWithoutDate_OmitsLastmod()
{
var xml = SitemapXml.Build(BaseUrl, [Release("key-1", ReleaseMedium.Cut)]);
var doc = XDocument.Parse(xml);
var releaseUrl = doc.Root!.Elements(Ns + "url")
.Single(u => u.Element(Ns + "loc")!.Value.EndsWith("/cuts/key-1"));
Assert.That(releaseUrl.Element(Ns + "lastmod"), Is.Null);
}
[Test]
public void Build_StaticRoots_NeverCarryLastmod()
{
var xml = SitemapXml.Build(BaseUrl, []);
var doc = XDocument.Parse(xml);
Assert.That(doc.Root!.Elements(Ns + "url").All(u => u.Element(Ns + "lastmod") is null), Is.True);
}
[Test]
public void Build_TrimsTrailingSlashOnBaseUrl()
{
var xml = SitemapXml.Build("https://deepdrft.com/", [Release("key-1", ReleaseMedium.Cut)]);
Assert.Multiple(() =>
{
// No doubled slash on the root or the release URL.
Assert.That(Locs(xml), Does.Contain("https://deepdrft.com/"));
Assert.That(Locs(xml), Does.Contain("https://deepdrft.com/cuts/key-1"));
});
}
}