mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 15:41:41 +00:00
3fe511189f
scripts/ci-guards/doc-rot-detector.sh — walks every *.md under docs/,
parses the '> Last reviewed: YYYY-MM-DD' blockquote convention
established by the 2026-05-04 docs overhaul, emits:
- ::warning:: GitHub annotation when a doc is >= 90 days old
(heads-up; non-blocking).
- ::error:: + exit 1 when >= 120 days (build-blocking).
Uses HEAD commit timestamp (git log -1 --format=%cs) as 'now' rather
than wall clock — keeps the guard reproducible on a release that's
been on a shelf.
Verified in sandbox:
- Clean run: 90 docs scanned, 88 dated (2 in docs/archive/
allowlisted in bulk), 0 missing field, 0 warns, 0 fails.
- Negative test (backdated docs/README.md to 2025-12-01, 162d):
fires with '::error::Docs older than 120 days (build-blocking)'
+ three remediation paths listed.
Allowlist at scripts/ci-guards/doc-rot-detector-exceptions.yaml:
- 'docs/archive/' bulk-allowlisted (intentionally frozen content)
- Per-doc entries require name + justification + expiration date;
expired entries fail the guard.
Bootstrap sweep NOT required — baseline survey at branch creation
shows oldest doc is 7 days old (2026-05-05); zero docs over either
threshold today. Forward-looking insurance only.
Audit-Closes: post-v2.1.0-anti-rot/item-5
183 lines
6.3 KiB
Bash
Executable File
183 lines
6.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# scripts/ci-guards/doc-rot-detector.sh
|
|
#
|
|
# Per post-v2.1.0 anti-rot item 5 (Auditable Codebase Bundle).
|
|
#
|
|
# Walks every *.md under docs/ and parses the "> Last reviewed:
|
|
# YYYY-MM-DD" blockquote line (the convention established by the
|
|
# 2026-05-04 docs overhaul — every doc carries one). Emits:
|
|
#
|
|
# - ::warning:: GitHub annotation (yellow, non-blocking) when a doc
|
|
# is older than 90 days vs HEAD's commit timestamp.
|
|
# - ::error:: GitHub annotation + exit 1 when a doc is older than
|
|
# 120 days.
|
|
#
|
|
# Uses HEAD's commit timestamp (git log -1 --format=%ai HEAD) as "now"
|
|
# rather than wall-clock — keeps the guard reproducible on a release
|
|
# that's been on a shelf. A 2-year-old commit verified today should
|
|
# fail the same docs it failed back then, not new ones.
|
|
#
|
|
# Allowlist: scripts/ci-guards/doc-rot-detector-exceptions.yaml
|
|
# (every entry carries a one-line justification + an expiration date).
|
|
# docs/archive/** is allowlisted in bulk by directory; it's
|
|
# intentionally frozen historical content and shouldn't keep getting
|
|
# reviewed.
|
|
|
|
set -e
|
|
|
|
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
|
EXCEPTIONS_FILE="${REPO_ROOT}/scripts/ci-guards/doc-rot-detector-exceptions.yaml"
|
|
|
|
WARN_DAYS="${CERTCTL_DOC_ROT_WARN_DAYS:-90}"
|
|
FAIL_DAYS="${CERTCTL_DOC_ROT_FAIL_DAYS:-120}"
|
|
|
|
cd "$REPO_ROOT"
|
|
|
|
# "Now" = the commit timestamp of HEAD, in YYYY-MM-DD form. Falls back
|
|
# to the wall clock if git fails (e.g., guard run outside a repo).
|
|
NOW_DATE="$(git -C "$REPO_ROOT" log -1 --format=%cs HEAD 2>/dev/null || date -u +%Y-%m-%d)"
|
|
|
|
python3 - "$REPO_ROOT" "$EXCEPTIONS_FILE" "$NOW_DATE" "$WARN_DAYS" "$FAIL_DAYS" <<'PY'
|
|
import os, sys, datetime, pathlib, re
|
|
|
|
repo_root = pathlib.Path(sys.argv[1])
|
|
exceptions_path = pathlib.Path(sys.argv[2])
|
|
now_str = sys.argv[3]
|
|
warn_days = int(sys.argv[4])
|
|
fail_days = int(sys.argv[5])
|
|
|
|
try:
|
|
now = datetime.date.fromisoformat(now_str)
|
|
except Exception:
|
|
sys.stderr.write(f"could not parse now={now_str!r}\n")
|
|
sys.exit(2)
|
|
|
|
# Load allowlist. Same tiny YAML reader the other guards use.
|
|
allowlist_paths = set()
|
|
per_doc = {}
|
|
if exceptions_path.exists():
|
|
txt = exceptions_path.read_text()
|
|
cur = None
|
|
for raw in txt.splitlines():
|
|
line = raw.rstrip()
|
|
if not line.strip() or line.lstrip().startswith("#"):
|
|
continue
|
|
if line.lstrip().startswith("- path:"):
|
|
cur = {"path": line.split(":", 1)[1].strip().strip('"').strip("'")}
|
|
# entries can be a directory (path ends with /) or a single file
|
|
if cur["path"].endswith("/"):
|
|
allowlist_paths.add(cur["path"])
|
|
else:
|
|
per_doc[cur["path"]] = cur
|
|
continue
|
|
if cur is not None and line.startswith(" "):
|
|
if ":" not in line:
|
|
continue
|
|
k, v = line.split(":", 1)
|
|
cur[k.strip()] = v.strip().strip('"').strip("'")
|
|
|
|
LAST_REVIEWED_RE = re.compile(r"^>\s*Last reviewed:\s*(\d{4}-\d{2}-\d{2})\s*$", re.MULTILINE)
|
|
|
|
docs_root = repo_root / "docs"
|
|
if not docs_root.exists():
|
|
sys.stderr.write("docs/ not found — nothing to check\n")
|
|
sys.exit(0)
|
|
|
|
# Collect every doc file.
|
|
docs = []
|
|
for fp in docs_root.rglob("*.md"):
|
|
rel = fp.relative_to(repo_root).as_posix()
|
|
docs.append((rel, fp))
|
|
|
|
def is_in_allowlisted_dir(rel: str) -> bool:
|
|
for prefix in allowlist_paths:
|
|
if rel.startswith(prefix):
|
|
return True
|
|
return False
|
|
|
|
def per_doc_active(rel: str) -> (bool, str):
|
|
if rel not in per_doc:
|
|
return False, ""
|
|
e = per_doc[rel]
|
|
exp = e.get("expires")
|
|
just = e.get("justification", "")
|
|
if not exp:
|
|
return False, "allowlist entry missing 'expires:'"
|
|
try:
|
|
ed = datetime.date.fromisoformat(exp)
|
|
except Exception:
|
|
return False, f"allowlist entry has malformed expires: {exp!r}"
|
|
if ed < now:
|
|
return False, f"allowlist entry expired on {exp}"
|
|
if not just:
|
|
return False, "allowlist entry has no justification"
|
|
return True, f"allowlisted until {exp}: {just}"
|
|
|
|
warn_rows = []
|
|
fail_rows = []
|
|
missing_field_rows = []
|
|
skipped = 0
|
|
total_checked = 0
|
|
|
|
for rel, fp in sorted(docs):
|
|
if is_in_allowlisted_dir(rel):
|
|
skipped += 1
|
|
continue
|
|
ok, msg = per_doc_active(rel)
|
|
if ok:
|
|
skipped += 1
|
|
continue
|
|
body = fp.read_text(errors="ignore")
|
|
m = LAST_REVIEWED_RE.search(body)
|
|
if not m:
|
|
missing_field_rows.append(rel)
|
|
continue
|
|
try:
|
|
reviewed = datetime.date.fromisoformat(m.group(1))
|
|
except Exception:
|
|
missing_field_rows.append(rel + f" (unparseable date {m.group(1)!r})")
|
|
continue
|
|
total_checked += 1
|
|
age = (now - reviewed).days
|
|
if age >= fail_days:
|
|
fail_rows.append((rel, reviewed.isoformat(), age))
|
|
elif age >= warn_days:
|
|
warn_rows.append((rel, reviewed.isoformat(), age))
|
|
|
|
print(f"doc-rot-detector — now={now.isoformat()} warn≥{warn_days}d fail≥{fail_days}d")
|
|
print(f" total docs scanned: {len(docs)}, allowlisted: {skipped}, dated: {total_checked}, missing date field: {len(missing_field_rows)}")
|
|
print()
|
|
|
|
if missing_field_rows:
|
|
print("::warning::Docs missing or unparseable '> Last reviewed: YYYY-MM-DD' line:")
|
|
for r in missing_field_rows:
|
|
print(f" - {r}")
|
|
print()
|
|
print(" Add the convention line near the top of each doc, e.g.:")
|
|
print(' > Last reviewed: 2026-MM-DD')
|
|
|
|
if warn_rows:
|
|
print(f"::warning::Docs older than {warn_days} days (heads-up, non-blocking):")
|
|
for rel, d, age in warn_rows:
|
|
print(f" - {rel}: reviewed {d} ({age}d ago)")
|
|
print()
|
|
|
|
if fail_rows:
|
|
print(f"::error::Docs older than {fail_days} days (build-blocking):")
|
|
for rel, d, age in fail_rows:
|
|
print(f" - {rel}: reviewed {d} ({age}d ago)")
|
|
print()
|
|
print(" Fix options:")
|
|
print(" 1. Re-read the doc against the repo, fix any drift, bump '> Last reviewed:' to today.")
|
|
print(" 2. If the doc is intentionally frozen, move it under docs/archive/ (allowlisted in bulk).")
|
|
print(f" 3. Add a per-doc allowlist row to {exceptions_path.relative_to(repo_root)} with a justification + expiration.")
|
|
sys.exit(1)
|
|
|
|
# Missing-date-field counts as a hard fail too — the convention is
|
|
# load-bearing.
|
|
if missing_field_rows:
|
|
sys.exit(1)
|
|
|
|
print("OK — every doc under docs/ has a recent '> Last reviewed:' date.")
|
|
PY
|