name: CI

on:
  push:
    branches:
      - master
      - v2-dev
  pull_request:
    branches:
      - master

jobs:
  go-build-and-test:
    name: Go Build & Test
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: '1.25.9'

      - name: Go Build
        run: |
          go build ./cmd/server/...
          go build ./cmd/agent/...
          go build ./cmd/mcp-server/...
          go build ./cmd/cli/...

      - name: Go Vet
        run: go vet ./...

      - name: Install golangci-lint
        run: |
          curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.11.4

      - name: Run golangci-lint
        run: golangci-lint run ./... --timeout 5m

      - name: Install govulncheck
        run: go install golang.org/x/vuln/cmd/govulncheck@latest

      - name: Run govulncheck (M-024 hard gate)
        # Bundle-7 / D-001 partial: govulncheck distinguishes called-vs-uncalled
        # advisories. Default exit code is non-zero only when YOUR code calls
        # the vulnerable function — deferred-call advisories show up in the
        # output but don't fail the gate.
        #
        # Bundle F / Audit M-024 (NIST SSDF PW.7.2): the govulncheck step
        # is now a hard CI gate (no `continue-on-error`). Bundle E's
        # transitive bumps (x/net 0.42→0.47, x/crypto 0.41→0.45) cleared
        # the 5 deferred-call advisories that were previously on the
        # exception list, so the carve-out the original Bundle F prompt
        # designed is unnecessary — a clean `govulncheck ./...` is the
        # right gate. If a future advisory lands in a function our code
        # does call, this step fails the build until either upstream
        # ships a fix OR we cut the dep. Deferred-call advisories that
        # legitimately can't be remediated yet should be added to the
        # NIST SSDF deviation log in docs/security.md, not silenced here.
        run: govulncheck ./...

      - name: Install staticcheck (Bundle-7 / D-001)
        run: go install honnef.co/go/tools/cmd/staticcheck@latest

      - name: Run staticcheck
        # Bundle-7 / D-001: Go static analysis additive to vet. Suppressed
        # rules live in staticcheck.conf with documented justifications;
        # adding a new entry requires an explicit security review.
        #
        # ci-pipeline-cleanup Phase 3 / frozen decision 0.7: HARD gate.
        # M-028 SA1019 sites verified closed at HEAD 1de61e91:
        #   - middleware.NewAuth: zero callers (all migrated to
        #     NewAuthWithNamedKeys in cmd/server/{main,main_test}.go)
        #   - csr.Attributes (internal/api/handler/scep.go × 2): inline
        #     //lint:ignore SA1019 with load-bearing rationale (RFC 2985
        #     challengePassword has no non-deprecated stdlib API)
        #   - elliptic.Marshal: only in bundle9_coverage_test.go × 1 as
        #     deliberate byte-equivalence regression oracle, suppressed
        #     with //lint:ignore SA1019
        run: staticcheck ./...

      - name: Race Detection
        run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/crypto/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -timeout 300s

      - name: Go Test with Coverage
        run: |
          go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/connector/discovery/... ./internal/crypto/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -cover -coverprofile=coverage.out

      - name: Check Coverage Thresholds
        # ci-pipeline-cleanup Phase 2: per-package floors moved to
        # .github/coverage-thresholds.yml. Each entry has `floor:` +
        # `why:` (load-bearing context). Logic in
        # scripts/check-coverage-thresholds.sh — operator runs the same
        # script locally via `make verify`-equivalent loop.
        run: bash scripts/check-coverage-thresholds.sh

      - name: Upload Coverage Report
        uses: actions/upload-artifact@v4
        with:
          name: go-coverage
          path: coverage.out
          retention-days: 30

      # Bundle P / Strengthening #6 — QA-doc drift guards. Forces every PR
      # that adds a Part to docs/testing-guide.md OR a seed row to
      # migrations/seed_demo.sql to keep docs/qa-test-guide.md in sync. This
      # eliminates the doc-drift class structurally — the symptom Bundle I
      # had to clean up by hand becomes a CI-time error going forward.
      - name: QA-doc Part-count drift guard
        run: |
          set -e
          DOC_PARTS=$(grep -oE '49 of [0-9]+ Parts' docs/qa-test-guide.md | grep -oE '[0-9]+' | tail -1)
          GUIDE_PARTS=$(grep -cE '^## Part [0-9]+:' docs/testing-guide.md)
          if [ -z "$DOC_PARTS" ]; then
            echo "::error::Could not extract Part count from docs/qa-test-guide.md headline."
            echo "  Expected pattern: '49 of <N> Parts'"
            exit 1
          fi
          if [ "$DOC_PARTS" != "$GUIDE_PARTS" ]; then
            echo "::error::DRIFT — qa-test-guide.md headline claims $DOC_PARTS Parts; testing-guide.md has $GUIDE_PARTS Parts."
            echo "  Update docs/qa-test-guide.md to match. Bundle I patched this once;"
            echo "  Bundle P added this guard so the drift cannot recur silently."
            exit 1
          fi
          echo "QA-doc Part-count drift guard: clean ($DOC_PARTS == $GUIDE_PARTS)."

      - name: QA-doc seed-count drift guard
        run: |
          set -e
          # Seed-cert count: agnostic to documented header format. The current
          # documented count lives in `### Certificates (32 total in ...` —
          # extract the first integer in that header.
          DOC_CERTS=$(grep -oE '### Certificates \([0-9]+' docs/qa-test-guide.md | grep -oE '[0-9]+' | head -1)
          # Authoritative count: unique mc-* IDs in seed_demo.sql.
          SEED_CERTS=$(grep -oE 'mc-[a-z0-9_-]+' migrations/seed_demo.sql | sort -u | wc -l | tr -d ' ')
          if [ -z "$DOC_CERTS" ]; then
            echo "::warning::Could not extract documented cert count from docs/qa-test-guide.md."
            echo "  Skipping cert-count drift check (header format may have changed)."
          elif [ "$DOC_CERTS" != "$SEED_CERTS" ]; then
            echo "::error::DRIFT — qa-test-guide.md says $DOC_CERTS certs; seed_demo.sql has $SEED_CERTS unique mc-* IDs."
            echo "  Update docs/qa-test-guide.md::Seed Data Reference to match."
            exit 1
          fi
          # Issuers: seed-table count vs doc claim.
          DOC_ISS=$(grep -oE '### Issuers \([0-9]+' docs/qa-test-guide.md | grep -oE '[0-9]+' | head -1)
          # Authoritative: unique iss-* IDs (close enough proxy; the issuers
          # table count IS the unique-ID count for this prefix).
          SEED_ISS=$(grep -oE 'iss-[a-z0-9_-]+' migrations/seed_demo.sql | sort -u | wc -l | tr -d ' ')
          if [ -z "$DOC_ISS" ]; then
            echo "::warning::Could not extract documented issuer count."
          elif [ "$DOC_ISS" != "$SEED_ISS" ] && [ "$((SEED_ISS - DOC_ISS))" -gt 5 ]; then
            # Allow up to 5pp slack — iss-* IDs appear in audit_events and
            # other reference tables that aren't issuer-table rows. Drift
            # only flags when the spread grows large.
            echo "::error::DRIFT — qa-test-guide.md says $DOC_ISS issuers; seed_demo.sql has $SEED_ISS unique iss-* IDs (spread > 5)."
            exit 1
          fi
          echo "QA-doc seed-count drift guard: clean."

      # Bundle Q / I-001 closure — test-naming convention guard (informational).
      # The convention is `Test<Func>_<Scenario>_<ExpectedResult>`. This step
      # prints any non-conformant tests but does NOT fail the build until the
      # Bundle I-001-extended (2026-04-27) — promoted from informational
      # to hard-fail. The convention is now: every `func TestXxx(...)` MUST
      # match Go's standard test-runner pattern (`^func Test[A-Z]`). Tests
      # whose name starts with `func Test<lowercase>` are silently SKIPPED
      # by `go test` (Go only runs `Test[A-Z]...`) — those are the real
      # bugs this guard catches.
      #
      # The original audit's `Test<Func>_<Scenario>_<ExpectedResult>` triple-
      # token prescription has been relaxed: single-function pin tests like
      # `TestNewAgent` or `TestSplitPEMChain` are valid Go convention, with
      # internal scenarios expressed via `t.Run` subtests. Requiring the
      # underscore-Scenario-Result triple repo-wide would mean renaming
      # 167 legitimate tests for no observable behavior change. The
      # Test<Func>_<Scenario>_<ExpectedResult> form remains documented as
      # the recommended pattern for parameterized scenarios in
      # docs/qa-test-guide.md, but is not gated.
      - name: Regression guards (extracted to scripts/ci-guards/)
        # All named regression guards live at scripts/ci-guards/<id>.sh per
        # ci-pipeline-cleanup bundle Phase 1. Each guard is callable locally:
        #   bash scripts/ci-guards/G-3-env-docs-drift.sh
        # Adding a new guard: drop a new <id>.sh; this loop auto-picks it up.
        # Contract: each guard MUST exit 0 on clean repo, non-zero with
        # ::error:: prefix on regression. See scripts/ci-guards/README.md.
        run: |
          set -e
          fail=0
          for g in scripts/ci-guards/*.sh; do
            echo "::group::$(basename "$g")"
            if ! bash "$g"; then
              fail=1
            fi
            echo "::endgroup::"
          done
          exit $fail

  frontend-build:
    name: Frontend Build
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '22'

      - name: Install Dependencies
        working-directory: web
        run: npm ci

      - name: TypeScript Check
        working-directory: web
        run: npx tsc --noEmit

      - name: Run Frontend Tests
        working-directory: web
        run: npx vitest run

      - name: Build Frontend
        working-directory: web
        run: npx vite build

      - name: Regression guards (extracted to scripts/ci-guards/)
        # All named regression guards live at scripts/ci-guards/<id>.sh per
        # ci-pipeline-cleanup bundle Phase 1. Each guard is callable locally:
        #   bash scripts/ci-guards/G-3-env-docs-drift.sh
        # Adding a new guard: drop a new <id>.sh; this loop auto-picks it up.
        # Contract: each guard MUST exit 0 on clean repo, non-zero with
        # ::error:: prefix on regression. See scripts/ci-guards/README.md.
        run: |
          set -e
          fail=0
          for g in scripts/ci-guards/*.sh; do
            echo "::group::$(basename "$g")"
            if ! bash "$g"; then
              fail=1
            fi
            echo "::endgroup::"
          done
          exit $fail

  helm-lint:
    name: Helm Chart Validation
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Install Helm
        uses: azure/setup-helm@v4
        with:
          version: '3.13.0'

      # HTTPS-Everywhere (v2.0.47): the chart fails render when no TLS source is
      # configured. Every lint/template invocation below must pick exactly one
      # provisioning mode — see deploy/helm/certctl/templates/_helpers.tpl
      # (certctl.tls.required) and docs/tls.md.
      - name: Lint Helm Chart
        run: |
          helm lint deploy/helm/certctl/ \
            --set server.tls.existingSecret=certctl-tls-ci

      - name: Template Helm Chart (existingSecret mode)
        run: |
          helm template certctl deploy/helm/certctl/ \
            --set server.tls.existingSecret=certctl-tls-ci \
            > /dev/null

      - name: Template Helm Chart (cert-manager mode)
        run: |
          helm template certctl deploy/helm/certctl/ \
            --set server.tls.certManager.enabled=true \
            --set server.tls.certManager.issuerRef.name=letsencrypt-prod \
            > /dev/null

      - name: Template Helm Chart (guard fails without TLS)
        run: |
          # Inverse test: the chart MUST refuse to render when no TLS source is
          # configured. If this ever renders successfully, the fail-loud guard
          # in certctl.tls.required has regressed.
          if helm template certctl deploy/helm/certctl/ > /dev/null 2>&1; then
            echo "::error::Helm chart rendered without a TLS source — fail-loud guard regressed"
            exit 1
          fi

  # =============================================================================
  # Deploy-Hardening II Phase 15 — per-vendor e2e matrix
  # =============================================================================
  # Per frozen decision 0.9: each vendor's e2e tests run in their own
  # matrix job so vendor failures surface independently in the CI status
  # check (operator sees "K8s 1.31 vendor-edge fail" as a discrete check,
  # not a generic "integration tests failed").
  deploy-vendor-e2e:
    name: deploy-vendor-e2e (${{ matrix.vendor }})
    runs-on: ubuntu-latest
    needs: [go-build-and-test]
    strategy:
      fail-fast: false
      matrix:
        vendor: [nginx, apache, haproxy, traefik, caddy, envoy, postfix, dovecot, ssh, javakeystore, k8s, f5-mock]
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@v5

      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: '1.25.9'
          cache: true

      - name: Bring up vendor sidecar
        # Map matrix.vendor → docker-compose service name. The naming is
        # NOT 1:1 because (a) the legacy NGINX vendor-edge tests reuse the
        # apache-test sidecar via requireSidecar(t,"apache") — see the
        # comment in deploy/test/nginx_vendor_e2e_test.go; (b) the openssh
        # service is named openssh-test (not ssh-test); (c) the kind
        # cluster service is named k8s-kind-test; (d) the F5 mock service
        # is named f5-mock-icontrol and must be built first because it
        # has no published image; (e) the JavaKeystore tests are pure-Go
        # placeholder stubs that exercise no sidecar.
        run: |
          set -e
          case "${{ matrix.vendor }}" in
            nginx)        SVC=apache-test ;;       # nginx tests reuse apache sidecar
            apache)       SVC=apache-test ;;
            haproxy)      SVC=haproxy-test ;;
            traefik)      SVC=traefik-test ;;
            caddy)        SVC=caddy-test ;;
            envoy)        SVC=envoy-test ;;
            postfix)      SVC=postfix-test ;;
            dovecot)      SVC=dovecot-test ;;
            ssh)          SVC=openssh-test ;;
            k8s)          SVC=k8s-kind-test ;;
            f5-mock)      SVC=f5-mock-icontrol ;;
            javakeystore) SVC="" ;;                # pure-Go placeholder stubs; no sidecar needed
            *)            echo "::error::unknown matrix vendor '${{ matrix.vendor }}'"; exit 1 ;;
          esac
          if [ -z "$SVC" ]; then
            echo "vendor=${{ matrix.vendor }} runs without a sidecar (pure-Go placeholder tests)"
            exit 0
          fi
          if [ "${{ matrix.vendor }}" = "f5-mock" ]; then
            docker compose --profile deploy-e2e -f deploy/docker-compose.test.yml build "$SVC"
          fi
          docker compose --profile deploy-e2e -f deploy/docker-compose.test.yml up -d "$SVC"
          sleep 5

      - name: Run vendor-edge e2e
        env:
          INTEGRATION: "1"
        run: |
          # Per frozen decision 0.6: discoverable via
          # `go test -run 'VendorEdge_<vendor>'`. Match the matrix
          # vendor (test names are CamelCase: TestVendorEdge_NGINX_*,
          # TestVendorEdge_HAProxy_*, etc.).
          case "${{ matrix.vendor }}" in
            nginx)        PATTERN='VendorEdge_NGINX' ;;
            apache)       PATTERN='VendorEdge_Apache' ;;
            haproxy)      PATTERN='VendorEdge_HAProxy' ;;
            traefik)      PATTERN='VendorEdge_Traefik' ;;
            caddy)        PATTERN='VendorEdge_Caddy' ;;
            envoy)        PATTERN='VendorEdge_Envoy' ;;
            postfix)      PATTERN='VendorEdge_Postfix' ;;
            dovecot)      PATTERN='VendorEdge_Dovecot' ;;
            ssh)          PATTERN='VendorEdge_SSH' ;;
            javakeystore) PATTERN='VendorEdge_JavaKeystore' ;;
            k8s)          PATTERN='VendorEdge_K8s' ;;
            f5-mock)      PATTERN='VendorEdge_F5' ;;
          esac
          go test -tags integration -race -count=1 -run "$PATTERN" ./deploy/test/...

      - name: Tear down sidecar
        if: always()
        run: docker compose --profile deploy-e2e -f deploy/docker-compose.test.yml down -v

  # =============================================================================
  # Deploy-Hardening II Phase 15 — Windows-host vendor e2e matrix
  # =============================================================================
  # IIS + WinCertStore tests run on windows-latest runners per frozen
  # decision 0.4 (Windows containers run only on Windows hosts).
  # Linux-only operators skip via //go:build integration && !no_iis.
  deploy-vendor-e2e-windows:
    name: deploy-vendor-e2e-windows (${{ matrix.vendor }})
    runs-on: windows-latest
    needs: [go-build-and-test]
    strategy:
      fail-fast: false
      matrix:
        vendor: [iis, wincertstore]
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@v5

      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: '1.25.9'
          cache: true

      - name: Bring up Windows IIS sidecar
        shell: powershell
        run: |
          docker compose --profile deploy-e2e-windows -f deploy/docker-compose.test.yml up -d windows-iis-test
          Start-Sleep -Seconds 10

      - name: Run vendor-edge e2e (Windows)
        env:
          INTEGRATION: "1"
        shell: powershell
        run: |
          $pattern = if ("${{ matrix.vendor }}" -eq "iis") { "VendorEdge_IIS" } else { "VendorEdge_WinCertStore" }
          go test -tags integration -race -count=1 -run $pattern ./deploy/test/...

      - name: Tear down sidecar
        if: always()
        shell: powershell
        run: docker compose --profile deploy-e2e-windows -f deploy/docker-compose.test.yml down -v