ajhahn.de
← FlashOS
Shell 117 lines
#!/usr/bin/env bash
# Deterministic doc-drift gate for CI.
#
# This is the MECHANICAL subset of the full doc-drift review: the checks that
# can be decided with zero judgement, so they are safe to fail a build on.
# The fuzzy prose checks (scenario/checkpoint wording, intra-doc numeric
# self-consistency, "is this version tag historical or a current-state claim")
# stay with the human-in-the-loop reviewer — they false-positive too easily to
# gate a push.
#
# Scope: the active public English docs only. README.md, DOCUMENTATION.md,
# SETUP.md. CHANGELOG.md is FROZEN provenance — a path named
# there (e.g. a since-deleted script) is honored lineage, not drift, and is
# never scanned here.
#
#   FATAL (exit 1): a repo-relative path referenced in an active doc that does
#                   not exist on disk. A dead reference is an unambiguous bug.
#   WARN  (exit 0): version badge vs the newest tag, documented `zig build`
#                   targets vs build.zig, and the "N EL0 scenarios" count vs
#                   the boot contract. Printed for visibility; never fails CI.
#
# Usage: scripts/check_doc_drift.sh   (run from the repo root)
set -uo pipefail

DOCS="README.md DOCUMENTATION.md SETUP.md"
fatal=0

note()  { printf '%s\n' "$*"; }
warn()  { printf 'WARN  %s\n' "$*"; }
block() { printf 'BLOCK %s\n' "$*"; fatal=1; }

# --- FATAL: dead repo-relative paths ----------------------------------------
note "== dead-path check (FATAL) =="
# Extract MAXIMAL slashed tokens (so `armstub/src/x.S` is not truncated to
# `src/x.S`), then keep only those whose first segment is a real top-level
# directory — that both filters out non-paths (`and/or`) and anchors the path
# at its true root. Skip <placeholder> tokens and build artifacts (*.elf etc.
# are generated, legitimately absent from a clean checkout — not doc drift).
raw=$(grep -rhoE '[A-Za-z0-9_][A-Za-z0-9_./-]*/[A-Za-z0-9_.-]+' $DOCS 2>/dev/null \
      | sed -E 's/[.,:;)]+$//' \
      | sort -u)
dead=0
for p in $raw; do
  case "$p" in *'<'*|*'>'*|*/) continue ;; esac      # placeholder / bare dir
  case "$p" in *.elf|*.img|*.o|*.bin|*.a) continue ;; esac  # build artifacts
  first=${p%%/*}
  [ -d "$first" ] || continue                        # first segment not a repo dir
  [ -e "$p" ] && continue
  # Exists somewhere as a path SUFFIX? Then it is a relatively-shown reference
  # (e.g. a path drawn nested inside an ASCII tree diagram), not a dead one.
  # Only a segment sequence that appears NOWHERE in the repo is a true
  # rename/deletion worth failing on.
  find . -path "*/$p" -not -path './.zig-cache/*' -not -path './zig-out/*' 2>/dev/null | grep -q . && continue
  hits=$(grep -rnF "$p" $DOCS 2>/dev/null | head -3 | sed 's/^/    /')
  block "dead path: $p"
  printf '%s\n' "$hits"
  dead=$((dead+1))
done
[ "$dead" -eq 0 ] && note "ok: every referenced repo path exists"

# --- WARN: version badge vs newest tag --------------------------------------
note ""
note "== version badge vs newest tag (WARN) =="
tag=$(git tag --sort=-v:refname 2>/dev/null | head -1)
badge=$(grep -oE 'badge/version-v[0-9]+\.[0-9]+\.[0-9]+' README.md 2>/dev/null | head -1 | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+')
if [ -z "$tag" ]; then
  warn "no git tags reachable (shallow clone?) — skipped. For full coverage the CI checkout needs fetch-depth: 0."
elif [ -z "$badge" ]; then
  warn "no version-vX.Y.Z badge found in README.md — skipped."
elif [ "$badge" = "$tag" ]; then
  note "ok: README badge $badge == newest tag $tag"
else
  # numeric compare: badge behind tag is the real smell
  lower=$(printf '%s\n%s\n' "${badge#v}" "${tag#v}" | sort -V | head -1)
  if [ "$lower" = "${badge#v}" ]; then
    warn "README badge $badge is BEHIND newest tag $tag — bump the badge (or tag is ahead of a doc update)."
  else
    warn "README badge $badge is AHEAD of newest tag $tag — fine mid-development, stale if the tag was expected."
  fi
fi

# --- WARN: documented build targets resolve to build.zig steps --------------
note ""
note "== documented zig build targets (WARN) =="
targets=$(grep -rhoE 'zig build [a-z][a-z0-9-]+' $DOCS 2>/dev/null | awk '{print $3}' | sort -u)
miss=0
for t in $targets; do
  grep -qE "b\.step\(\"$t\"" build.zig 2>/dev/null && continue
  warn "doc names \`zig build $t\` but build.zig has no b.step(\"$t\") — board-gated/conditional, or stale. Confirm."
  miss=$((miss+1))
done
[ "$miss" -eq 0 ] && note "ok: every documented zig build target resolves to a build.zig step"

# --- WARN: "N EL0 scenarios" count vs the boot contract ---------------------
note ""
note "== EL0 scenario count (WARN) =="
contract=$(grep -oE '[0-9]+ EL0 scenarios' scripts/run_qemu_test.sh 2>/dev/null | head -1 | grep -oE '^[0-9]+')
if [ -n "$contract" ]; then
  bad=$(grep -rnoE '[0-9]+ EL0 scenarios' $DOCS 2>/dev/null | grep -vE ":$contract EL0 scenarios$" || true)
  if [ -n "$bad" ]; then
    warn "a doc states a different EL0-scenario count than the contract ($contract):"
    printf '%s\n' "$bad" | sed 's/^/    /'
  else
    note "ok: no doc contradicts the contract's $contract EL0 scenarios"
  fi
else
  warn "could not read the EL0-scenario count from scripts/run_qemu_test.sh — skipped."
fi

note ""
if [ "$fatal" -ne 0 ]; then
  note "RESULT: FAIL — dead doc references above. Fix the path (or remove the reference)."
  exit 1
fi
note "RESULT: pass (FATAL checks clean; warnings above are advisory — run /doc-drift for the deep pass)."
exit 0