112 lines
4.5 KiB
C#
112 lines
4.5 KiB
C#
using System.Net.Http.Json;
|
|
using System.Text.Json;
|
|
using DeepDrftModels.DTOs;
|
|
using Models.Common;
|
|
using DeepDrftPublic.Client.Common;
|
|
using DeepDrftPublic.Seo;
|
|
using Microsoft.AspNetCore.Mvc;
|
|
|
|
namespace DeepDrftPublic.Controllers;
|
|
|
|
/// <summary>
|
|
/// Serves the public crawl-directive surfaces (Phase 23): <c>GET /robots.txt</c> and
|
|
/// <c>GET /sitemap.xml</c>. Both are environment-gated server-side via
|
|
/// <see cref="IWebHostEnvironment.IsProduction"/> read directly here — not the WASM-only
|
|
/// <c>SeoEnvironment</c> bridge — and fail safe closed (non-production is uncrawlable, Invariant E1).
|
|
///
|
|
/// <para>
|
|
/// This is a thin host boundary: it owns the gate and the release walk, and delegates all body composition
|
|
/// to the pure <see cref="RobotsTxt"/> / <see cref="SitemapXml"/> builders. The sitemap walk reuses the
|
|
/// existing <c>"DeepDrft.API"</c> named client server-to-server (the same client SSR prerender uses) — it
|
|
/// <b>enumerates and transforms</b> releases into XML rather than relaying verbatim like the proxy controllers.
|
|
/// No new API endpoint, no schema change (Phase 22 C5 holds).
|
|
/// </para>
|
|
/// </summary>
|
|
[ApiController]
|
|
public class CrawlDirectiveController : ControllerBase
|
|
{
|
|
// 100 is the server-side PageSize cap, so this is the largest page the walk can actually get.
|
|
private const int WalkPageSize = 100;
|
|
|
|
// The release walk deserializes a bare PagedResult<ReleaseDto> (no ApiResultDto envelope), matching TrackClient.
|
|
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web);
|
|
|
|
private readonly IWebHostEnvironment _environment;
|
|
private readonly SeoOptions _seoOptions;
|
|
private readonly HttpClient _upstream;
|
|
private readonly ILogger<CrawlDirectiveController> _logger;
|
|
|
|
public CrawlDirectiveController(
|
|
IWebHostEnvironment environment,
|
|
SeoOptions seoOptions,
|
|
IHttpClientFactory httpClientFactory,
|
|
ILogger<CrawlDirectiveController> logger)
|
|
{
|
|
_environment = environment;
|
|
_seoOptions = seoOptions;
|
|
_upstream = httpClientFactory.CreateClient("DeepDrft.API");
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <summary>
|
|
/// <c>GET /robots.txt</c>. Production: allow + FramePlayer/api disallows + sitemap pointer. Any
|
|
/// non-production environment: <c>Disallow: /</c> with no sitemap pointer (E1). Always <c>text/plain</c>.
|
|
/// </summary>
|
|
[HttpGet("/robots.txt")]
|
|
public ContentResult GetRobots()
|
|
{
|
|
var body = RobotsTxt.Build(_environment.IsProduction(), _seoOptions.BaseUrl);
|
|
return Content(body, "text/plain");
|
|
}
|
|
|
|
/// <summary>
|
|
/// <c>GET /sitemap.xml</c>. Non-production: 404 (the non-prod robots carries no sitemap pointer, so
|
|
/// nothing references it). Production: the static roots plus one entry per release. Resilient — a
|
|
/// partial/empty/failed release read yields a well-formed (possibly roots-only) document, never a 500.
|
|
/// </summary>
|
|
[HttpGet("/sitemap.xml")]
|
|
public async Task<ActionResult> GetSitemap(CancellationToken ct = default)
|
|
{
|
|
if (!_environment.IsProduction())
|
|
return NotFound();
|
|
|
|
var releases = await GatherReleasesAsync(ct);
|
|
var xml = SitemapXml.Build(_seoOptions.BaseUrl, releases);
|
|
return Content(xml, "application/xml");
|
|
}
|
|
|
|
// Walks GET api/release page by page until every release is read. On any upstream failure, returns the
|
|
// releases gathered so far (possibly none) so the sitemap degrades to a well-formed roots-only document
|
|
// rather than 500ing — a sitemap that errors trains crawlers to stop fetching it (AC-S5).
|
|
private async Task<IReadOnlyList<ReleaseDto>> GatherReleasesAsync(CancellationToken ct)
|
|
{
|
|
var gathered = new List<ReleaseDto>();
|
|
var page = 1;
|
|
|
|
try
|
|
{
|
|
while (true)
|
|
{
|
|
var result = await _upstream.GetFromJsonAsync<PagedResult<ReleaseDto>>(
|
|
$"api/release?page={page}&pageSize={WalkPageSize}", JsonOptions, ct);
|
|
|
|
if (result?.Items is null)
|
|
break;
|
|
|
|
gathered.AddRange(result.Items);
|
|
|
|
if (gathered.Count >= result.TotalCount || !result.Items.Any())
|
|
break;
|
|
|
|
page++;
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Sitemap release walk failed after gathering {Count} release(s); serving a partial sitemap", gathered.Count);
|
|
}
|
|
|
|
return gathered;
|
|
}
|
|
}
|