diff --git a/.github/workflows/loadtest.yml b/.github/workflows/loadtest.yml
index db2919c..b02b407 100644
--- a/.github/workflows/loadtest.yml
+++ b/.github/workflows/loadtest.yml
@@ -75,3 +75,65 @@ jobs:
           name: k6-summary-${{ github.run_id }}
           path: deploy/test/loadtest/results/
           retention-days: 90
+
+  # ---------------------------------------------------------------------------
+  # Phase 8 SCALE-H2 — scale-tier scenarios. Three new k6 drivers:
+  #   - bulk-renewal: 10K-cert seed + criteria-mode POST /bulk-renew
+  #   - acme-burst:   200 concurrent VUs against directory/nonce/ARI
+  #   - agent-storm:  5K-agent seed + 167 heartbeats/sec sustained
+  #
+  # Matrix dispatch so each scenario runs on its own runner and a
+  # regression in one doesn't mask another. The matrix runs in parallel,
+  # which keeps total wall time around the existing 25-minute cap rather
+  # than ~70 minutes serialised. Each scenario brings up the full
+  # loadtest compose stack independently — there's no shared state
+  # between scenarios that would benefit from a single-runner serial
+  # invocation.
+  #
+  # Cadence: same as the API + connector tier job above (workflow_dispatch
+  # + Mondays 06:00 UTC). The scale scenarios DO produce useful per-PR
+  # signal in theory, but the per-run cost (image build + 5min run × 3)
+  # is too high to gate on every PR; weekly is the right trade-off.
+  # ---------------------------------------------------------------------------
+  k6-scale:
+    name: k6 scale tier (${{ matrix.scenario }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    needs: k6
+    strategy:
+      # Parallel: a failure in one scenario shouldn't cancel the others.
+      # Each scenario's threshold breach is independent diagnostic data.
+      fail-fast: false
+      matrix:
+        scenario:
+          - bulk-renewal
+          - acme-burst
+          - agent-storm
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3
+
+      - name: Run scale loadtest (${{ matrix.scenario }})
+        env:
+          BUILDKIT_PROGRESS: plain
+        run: |
+          case "${{ matrix.scenario }}" in
+            bulk-renewal) make loadtest-scale-bulk ;;
+            acme-burst)   make loadtest-scale-acme ;;
+            agent-storm)  make loadtest-scale-agent ;;
+            *) echo "::error::unknown scenario ${{ matrix.scenario }}"; exit 1 ;;
+          esac
+
+      - name: Upload summary
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        with:
+          # Per-scenario artifact name so the three matrix runs don't
+          # collide on upload.
+          name: k6-scale-${{ matrix.scenario }}-${{ github.run_id }}
+          path: deploy/test/loadtest/results/
+          retention-days: 90
diff --git a/Makefile b/Makefile
index 20a8639..ef816e0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help build run test lint verify verify-deploy loadtest acme-cert-manager-test acme-rfc-conformance-test keycloak-integration-test okta-smoke-test benchmark-auth benchmark-auth-coldcache clean docker-up docker-down migrate-up migrate-down generate test-cover frontend-build e2e-test qa-stats
+.PHONY: help build run test lint verify verify-deploy loadtest loadtest-scale loadtest-scale-bulk loadtest-scale-acme loadtest-scale-agent acme-cert-manager-test acme-rfc-conformance-test keycloak-integration-test okta-smoke-test benchmark-auth benchmark-auth-coldcache clean docker-up docker-down migrate-up migrate-down generate test-cover frontend-build e2e-test qa-stats
 
 # Default target - show help
 help:
@@ -153,6 +153,49 @@ loadtest:
 	@echo "==> results landed in deploy/test/loadtest/results/"
 	@if [ -f deploy/test/loadtest/results/summary.txt ]; then cat deploy/test/loadtest/results/summary.txt; fi
 
+# Phase 8 SCALE-H2 — scale-tier load tests. Profile-gated in the
+# loadtest compose so the default `make loadtest` stays fast and
+# focused on the per-PR regression scope (API tier + connector tier).
+#
+# loadtest-scale-bulk runs the 10K-cert bulk-renew scenario.
+# loadtest-scale-acme runs the 200-VU ACME directory/nonce/ARI burst.
+# loadtest-scale-agent runs the 5K-agent heartbeat storm.
+#
+# Each target uses --exit-code-from <scenario-driver> so a threshold
+# breach surfaces as a non-zero make exit. The scale-seed init runs
+# once per invocation (idempotent via ON CONFLICT) so re-running a
+# target against the same compose stack is fine.
+loadtest-scale-bulk:
+	@echo "==> Phase 8 SCALE-H2: bulk-renewal scenario (10K cert fixture, ~6m)"
+	@cd deploy/test/loadtest && docker compose --profile scale up --build \
+	  --abort-on-container-exit --exit-code-from k6-scale-bulk
+	@echo ""
+	@echo "==> results: deploy/test/loadtest/results/summary-bulk-renewal.{json,txt}"
+	@if [ -f deploy/test/loadtest/results/summary-bulk-renewal.txt ]; then \
+	  cat deploy/test/loadtest/results/summary-bulk-renewal.txt; fi
+
+loadtest-scale-acme:
+	@echo "==> Phase 8 SCALE-H2: ACME enrollment burst (200 VU, ~6m)"
+	@cd deploy/test/loadtest && docker compose --profile scale up --build \
+	  --abort-on-container-exit --exit-code-from k6-scale-acme
+	@echo ""
+	@echo "==> results: deploy/test/loadtest/results/summary-acme-burst.{json,txt}"
+	@if [ -f deploy/test/loadtest/results/summary-acme-burst.txt ]; then \
+	  cat deploy/test/loadtest/results/summary-acme-burst.txt; fi
+
+loadtest-scale-agent:
+	@echo "==> Phase 8 SCALE-H2: agent heartbeat storm (5K agent fixture, ~6m)"
+	@cd deploy/test/loadtest && docker compose --profile scale up --build \
+	  --abort-on-container-exit --exit-code-from k6-scale-agent
+	@echo ""
+	@echo "==> results: deploy/test/loadtest/results/summary-agent-storm.{json,txt}"
+	@if [ -f deploy/test/loadtest/results/summary-agent-storm.txt ]; then \
+	  cat deploy/test/loadtest/results/summary-agent-storm.txt; fi
+
+# All three Phase 8 scenarios serially. Use the matrix in
+# .github/workflows/loadtest.yml for parallel CI runs.
+loadtest-scale: loadtest-scale-bulk loadtest-scale-acme loadtest-scale-agent
+
 # Auth Bundle 2 Phase 10 — Keycloak end-to-end OIDC integration test.
 # Boots a Keycloak container via testcontainers-go (quay.io/keycloak:25.0),
 # imports a canned realm with two groups + two users, and drives the
diff --git a/deploy/test/loadtest/README.md b/deploy/test/loadtest/README.md
index 6d54b83..1f50638 100644
--- a/deploy/test/loadtest/README.md
+++ b/deploy/test/loadtest/README.md
@@ -352,8 +352,35 @@ the ACME flow scenario. Operators with kind / cert-manager available
 should pair this with `make acme-cert-manager-test` for end-to-end
 verification.
 
+## Scale tier (Phase 8 SCALE-H2, 2026-05-14)
+
+Phase 8 closure added three new k6 scenarios that exercise the
+scale-relevant load surfaces the API tier and connector tier left
+uncovered:
+
+| Scenario | k6 file | Seed | Make target |
+|---|---|---|---|
+| Bulk-renewal under load | `k6/bulk_renewal.js` | `seed/01_bulk_renewal_certs.sql` (10K certs) | `make loadtest-scale-bulk` |
+| ACME enrollment burst | `k6/acme_burst.js` | (none — unauth surface) | `make loadtest-scale-acme` |
+| Agent heartbeat storm | `k6/agent_storm.js` | `seed/02_agent_fleet.sql` (5K agents) | `make loadtest-scale-agent` |
+
+The scale-tier scenarios live behind the `scale` compose profile so
+the default `make loadtest` (API tier + connector tier, ~7 min)
+stays fast. Run all three serially with `make loadtest-scale`, or
+trigger the `loadtest.yml` workflow's `k6-scale` matrix jobs from
+the Actions tab for canonical-hardware capture.
+
+Operator-facing baseline table + threshold contracts + documented
+limitations live in [`docs/operator/scale.md`](../../../docs/operator/scale.md)
+under the "Scale-tier scenarios (SCALE-H2, Phase 8)" section. Treat
+that as the canonical source — this README only links.
+
+The seed fixtures + their idempotency contract are documented in
+[`seed/README.md`](seed/README.md).
+
 ## Audit references
 
 - API tier:       2026-05-01 issuer coverage audit fix #8.
 - Connector tier: 2026-05-02 deployment-target audit Bundle 10.
 - ACME flows:     Phase 5 master prompt (project notes).
+- Scale tier:     2026-05-14 architecture diligence Phase 8 (SCALE-H2).
diff --git a/deploy/test/loadtest/docker-compose.yml b/deploy/test/loadtest/docker-compose.yml
index c60e45e..2bdaa57 100644
--- a/deploy/test/loadtest/docker-compose.yml
+++ b/deploy/test/loadtest/docker-compose.yml
@@ -351,3 +351,128 @@ services:
       - run
       - --summary-export=/results/summary.json
       - /scripts/k6.js
+
+  # ===========================================================================
+  # Phase 8 SCALE-H2 — scale-tier scenarios (opt-in via `--profile scale`).
+  #
+  # The default `make loadtest` path runs the API tier + connector tier
+  # scenarios above against the demo-scale seed. The Phase 8 scenarios are
+  # heavier (10K cert + 5K agent fixtures) and would slow the default path
+  # without serving the per-PR signal the existing run targets, so they live
+  # behind a separate compose profile.
+  #
+  # Three components, all profile-gated:
+  #   1. scale-seed    — one-shot init that runs ./seed/*.sql against the
+  #                      same postgres the server uses. Idempotent.
+  #   2. k6-scale-bulk / k6-scale-acme / k6-scale-agent — one driver each
+  #                      for the three Phase 8 scenarios. The matrix dispatch
+  #                      in .github/workflows/loadtest.yml picks one per job.
+  #
+  # Run a single scale scenario locally:
+  #   docker compose --profile scale up \
+  #       --abort-on-container-exit --exit-code-from k6-scale-bulk \
+  #       scale-seed k6-scale-bulk
+  # ===========================================================================
+
+  scale-seed:
+    # postgres:16-alpine bundles psql; no extra image needed.
+    image: postgres:16-alpine
+    container_name: certctl-loadtest-scale-seed
+    restart: "no"
+    profiles: ["scale"]
+    depends_on:
+      postgres:
+        condition: service_healthy
+      # Wait for certctl-server to be healthy — the server runs schema
+      # migrations + seed_demo.sql at boot. The Phase 8 seeds reference
+      # FKs (iss-local, o-alice, t-platform, rp-standard) that
+      # seed_demo.sql creates, so the order MUST be:
+      #   postgres up → server runs migrations + seed_demo.sql → scale-seed runs
+      certctl-server:
+        condition: service_healthy
+    environment:
+      PGHOST: postgres
+      PGUSER: certctl
+      PGPASSWORD: loadtestpass
+      PGDATABASE: certctl
+    volumes:
+      - ./seed:/seed:ro
+    entrypoint: /bin/sh
+    command:
+      - -c
+      - |
+        set -eu
+        echo "==> Phase 8 scale-seed: running SQL fixtures (lexical order)"
+        for f in /seed/*.sql; do
+            echo "----> $$f"
+            psql -v ON_ERROR_STOP=1 -f "$$f"
+        done
+        echo "==> Phase 8 scale-seed: complete"
+
+  k6-scale-bulk:
+    image: grafana/k6:0.54.0
+    container_name: certctl-loadtest-k6-bulk
+    profiles: ["scale"]
+    depends_on:
+      certctl-server:
+        condition: service_healthy
+      scale-seed:
+        condition: service_completed_successfully
+    environment:
+      CERTCTL_BASE: https://certctl-server:8443
+      CERTCTL_TOKEN: load-test-token
+      K6_INSECURE_SKIP_TLS_VERIFY: "true"
+    volumes:
+      - ./k6/bulk_renewal.js:/scripts/bulk_renewal.js:ro
+      - ./results:/results
+    command:
+      - run
+      - --summary-export=/results/summary-bulk-renewal.json
+      - /scripts/bulk_renewal.js
+
+  k6-scale-acme:
+    image: grafana/k6:0.54.0
+    container_name: certctl-loadtest-k6-acme
+    profiles: ["scale"]
+    depends_on:
+      certctl-server:
+        condition: service_healthy
+      # ACME scenario doesn't depend on the SQL seeds (it hits the
+      # unauthenticated directory + nonce + ARI surface) but routing
+      # it through the same dependency chain keeps the compose
+      # ordering predictable across the three scale jobs.
+      scale-seed:
+        condition: service_completed_successfully
+    environment:
+      CERTCTL_ACME_DIRECTORY: https://certctl-server:8443/acme/profile/prof-test/directory
+      K6_INSECURE_SKIP_TLS_VERIFY: "true"
+    volumes:
+      - ./k6/acme_burst.js:/scripts/acme_burst.js:ro
+      - ./results:/results
+    command:
+      - run
+      - --summary-export=/results/summary-acme-burst.json
+      - /scripts/acme_burst.js
+
+  k6-scale-agent:
+    image: grafana/k6:0.54.0
+    container_name: certctl-loadtest-k6-agent
+    profiles: ["scale"]
+    depends_on:
+      certctl-server:
+        condition: service_healthy
+      scale-seed:
+        condition: service_completed_successfully
+    environment:
+      CERTCTL_BASE: https://certctl-server:8443
+      CERTCTL_TOKEN: load-test-token
+      K6_INSECURE_SKIP_TLS_VERIFY: "true"
+      # Match the seed's 5K-agent fleet.
+      K6_AGENT_FLEET: "5000"
+    volumes:
+      - ./k6/agent_storm.js:/scripts/agent_storm.js:ro
+      - ./results:/results
+    command:
+      - run
+      - --summary-export=/results/summary-agent-storm.json
+      - /scripts/agent_storm.js
diff --git a/deploy/test/loadtest/k6/acme_burst.js b/deploy/test/loadtest/k6/acme_burst.js
new file mode 100644
index 0000000..11970f8
--- /dev/null
+++ b/deploy/test/loadtest/k6/acme_burst.js
@@ -0,0 +1,183 @@
+// Phase 8 SCALE-H2 — ACME enrollment burst.
+//
+// What this measures:
+//   200 concurrent VUs hammering the unauthenticated ACME directory
+//   + new-nonce + ARI surface for 5 minutes. The goal is the
+//   throughput ceiling for the entry-point handlers and the
+//   per-account rate-limit response shape Phase 5 added (RFC 8555
+//   §6.7 + RFC 7807 + the certctl-specific
+//   ErrACMEConcurrentOrdersExceeded path).
+//
+// What this does NOT measure (and why):
+//   - JWS-signed POST flows (new-account, new-order, finalize).
+//     k6 doesn't ship JWS, and bundling a Go signing helper into
+//     the k6 container would obscure the server-side latency the
+//     scenario is trying to pin. The existing
+//     `deploy/test/loadtest/k6/acme_flow.js` Phase 5 scenario
+//     made the same explicit trade-off; this Phase 8 burst scenario
+//     reuses the constraint. End-to-end JWS-signed conformance is
+//     gated by `make acme-rfc-conformance-test` (which uses lego
+//     against the same compose stack).
+//   - The actual order/finalize hot path. The newOrder handler's
+//     constant-time SCAN against acme_orders + the per-account
+//     concurrent-orders gate ARE useful to load-test, but require
+//     valid JWS to reach. The directory + new-nonce surface this
+//     scenario hits is what every ACME client transits BEFORE the
+//     signed flow — measuring it pins the server's headroom for
+//     the rest of the flow.
+//   - Issuer-side enrollment latency (DigiCert ACME, Let's Encrypt
+//     against a real prod CA, etc.). Same "load-testing someone
+//     else's API" carve-out as the API tier.
+//
+// What this DOES measure:
+//   - GET /acme/profile/{id}/directory throughput. Sustained 200
+//     concurrent VUs at a low per-VU sleep produces ~600-1000 req/s
+//     against this endpoint, well above what any production ACME
+//     client would generate but the right shape for finding the
+//     ceiling.
+//   - HEAD /acme/profile/{id}/new-nonce throughput. Nonce
+//     allocation is a hot path that writes one row to acme_nonces.
+//   - GET /acme/profile/{id}/renewal-info/{cert-id} 4xx fast path.
+//     Synthetic cert-id → handler returns 4xx without a DB lookup
+//     (cert-id is malformed at the parse layer). Measures the
+//     handler-front overhead under load.
+//   - 429 rate-limit response shape. The Phase 5 ACME per-account
+//     rate limit fires at sustained spike rates; the scenario pins
+//     that the 429 body is RFC 7807 with the
+//     "urn:ietf:params:acme:error:rateLimited" type. A regression
+//     that returned a plain text 429 or a different problem type
+//     would break ACME clients hard.
+//
+// Threshold contract:
+//   - directory p95 < 500ms, new-nonce p95 < 300ms, renewal-info
+//     p95 < 800ms — same as the Phase 5 acme_flow.js baselines.
+//   - 429 responses are EXPECTED at sustained 200 VU rate (the
+//     server's RFC-compliant rate limiter SHOULD kick in). The
+//     http_req_failed metric is tagged separately so 429s don't
+//     break the threshold; a separate `rate_limited` Counter
+//     tracks them so the operator can see how often the limiter
+//     fires.
+
+import http from 'k6/http';
+import { check } from 'k6';
+import { Counter, Trend } from 'k6/metrics';
+import { textSummary } from 'https://jslib.k6.io/k6-summary/0.0.2/index.js';
+
+const ACME_BASE = __ENV.CERTCTL_ACME_DIRECTORY ||
+    'https://certctl-server:8443/acme/profile/prof-test/directory';
+
+// Custom metrics.
+const directoryDuration = new Trend('acme_directory_duration', true);
+const newNonceDuration  = new Trend('acme_new_nonce_duration', true);
+const renewalInfoDuration = new Trend('acme_renewal_info_duration', true);
+const rateLimitedCount  = new Counter('acme_rate_limited_count');
+const rateLimitShapeOK  = new Counter('acme_rate_limit_shape_ok');
+
+export const options = {
+    scenarios: {
+        acme_burst: {
+            executor: 'constant-vus',
+            vus: parseInt(__ENV.K6_ACME_VUS || '200', 10),
+            duration: __ENV.K6_ACME_DURATION || '5m',
+            gracefulStop: '30s',
+            tags: { scenario: 'acme_burst' },
+        },
+    },
+    thresholds: {
+        'acme_directory_duration':    ['p(95)<500'],
+        'acme_new_nonce_duration':    ['p(95)<300'],
+        'acme_renewal_info_duration': ['p(95)<800'],
+        // 4xx (rate-limited or malformed-cert-id) is expected; 5xx is
+        // not. Filter to status >= 500 for the failure floor.
+        'http_req_failed{scenario:acme_burst,server_error:true}': ['rate<0.001'],
+    },
+    insecureSkipTLSVerify: true,
+    summaryTrendStats: ['avg', 'min', 'med', 'p(95)', 'p(99)', 'max'],
+};
+
+export default function () {
+    // Step 1 — directory.
+    let res = http.get(ACME_BASE, {
+        tags: { scenario: 'acme_burst', step: 'directory' },
+    });
+    directoryDuration.add(res.timings.duration);
+    check(res, { 'directory 200': (r) => r.status === 200 });
+
+    if (res.status === 429) {
+        recordRateLimit(res);
+        return; // backoff this VU iteration
+    }
+    if (res.status !== 200) return;
+
+    const dir = res.json();
+
+    // Step 2 — new-nonce.
+    if (dir.newNonce) {
+        res = http.head(dir.newNonce, {
+            tags: { scenario: 'acme_burst', step: 'new_nonce' },
+        });
+        newNonceDuration.add(res.timings.duration);
+        if (res.status === 429) {
+            recordRateLimit(res);
+            return;
+        }
+        check(res, {
+            'new-nonce 200': (r) => r.status === 200,
+            'replay-nonce header present': (r) => !!r.headers['Replay-Nonce'],
+        });
+    }
+
+    // Step 3 — ARI synthetic 4xx fast path. Phase 4 added ARI
+    // (RFC 9773); this exercises the malformed-cert-id branch which
+    // returns a 4xx without a DB lookup. Pinning this here means a
+    // regression that turned the malformed path into a DB query
+    // would surface as a p95 spike.
+    if (dir.renewalInfo) {
+        res = http.get(dir.renewalInfo + '/aaaa.bbbb', {
+            tags: { scenario: 'acme_burst', step: 'renewal_info' },
+        });
+        renewalInfoDuration.add(res.timings.duration);
+        if (res.status === 429) {
+            recordRateLimit(res);
+            return;
+        }
+        check(res, {
+            'renewal-info 4xx for synthetic cert-id':
+                (r) => r.status === 400 || r.status === 404,
+        });
+    }
+}
+
+// recordRateLimit pins the Phase 5 ACME rate-limit response shape:
+//   - HTTP 429
+//   - Content-Type: application/problem+json
+//   - Body: {"type":"urn:ietf:params:acme:error:rateLimited", ...}
+// A regression that returned 503 or a plain-text 429 or a different
+// problem type would NOT increment acme_rate_limit_shape_ok and the
+// operator would see (rate_limited_count - shape_ok_count) > 0 in
+// the summary.
+function recordRateLimit(res) {
+    rateLimitedCount.add(1);
+    const ct = res.headers['Content-Type'] || '';
+    if (!ct.includes('application/problem+json')) {
+        return;
+    }
+    let body;
+    try {
+        body = res.json();
+    } catch (e) {
+        return;
+    }
+    if (body && typeof body.type === 'string' &&
+        body.type.startsWith('urn:ietf:params:acme:error:rateLimited')) {
+        rateLimitShapeOK.add(1);
+    }
+}
+
+export function handleSummary(data) {
+    return {
+        '/results/summary-acme-burst.json': JSON.stringify(data, null, 2),
+        '/results/summary-acme-burst.txt': textSummary(data, { indent: ' ', enableColors: false }),
+        stdout: textSummary(data, { indent: ' ', enableColors: true }),
+    };
+}
diff --git a/deploy/test/loadtest/k6/agent_storm.js b/deploy/test/loadtest/k6/agent_storm.js
new file mode 100644
index 0000000..3c12c56
--- /dev/null
+++ b/deploy/test/loadtest/k6/agent_storm.js
@@ -0,0 +1,126 @@
+// Phase 8 SCALE-H2 — agent fleet heartbeat storm.
+//
+// What this measures:
+//   5,000 agents heartbeating at 30s intervals = ~167 heartbeats/sec
+//   sustained. Each heartbeat is POST /api/v1/agents/{id}/heartbeat
+//   with optional metadata. Pre-seeded fleet provided by
+//   deploy/test/loadtest/seed/02_agent_fleet.sql.
+//
+// What this does NOT measure:
+//   - The agent work-poll path (GET /api/v1/agents/{id}/work). The
+//     heartbeat hot path is the highest-frequency call on a typical
+//     fleet (work-poll cadence is 30s default like heartbeat, but
+//     work-poll returns the empty set 99% of the time and is cheap;
+//     heartbeat does an UPDATE on every call). v2 of the harness
+//     could combine them.
+//   - The agent CSR-submit path (POST /api/v1/agents/{id}/csr). That
+//     fires on per-cert issuance, not per heartbeat, and is exercised
+//     by the existing API tier's POST /api/v1/certificates scenario.
+//   - Auth-key per-agent rotation. The loadtest stack runs with a
+//     single api-key (`load-test-token`); per-agent api-key
+//     hashing/rotation isn't a load axis.
+//
+// Why constant-arrival-rate (not constant-vus):
+//   The point is to model what 5K real agents would offer the server
+//   at their native cadence. 5K agents * (1 heartbeat / 30s) =
+//   166.67 req/s offered. constant-arrival-rate fires at exactly
+//   that rate regardless of latency; if the server backpressures,
+//   queue builds and p99 shows it. constant-vus would let slow
+//   responses block, masking the actual ceiling.
+//
+// Threshold contract:
+//   - p99 < 1s for the heartbeat POST. The handler does an UPDATE on
+//     agents.last_heartbeat_at (+ optional metadata columns) and an
+//     RBAC check. Even at 200 req/s a tight UPDATE on an indexed
+//     primary key should stay sub-second.
+//   - p95 < 500ms.
+//   - Error rate < 0.1%. The seeded agents are all status='Online'
+//     so no 410 Gone (retired-agent) responses; anything 4xx is a
+//     bug. 5xx is a server health regression.
+//
+// Phase 8 reference:
+//   - Source finding: SCALE-H2.
+//   - Pre-state: heartbeat path not load-tested. The 100-agent demo
+//     seed in seed_demo.sql produces ~3 heartbeats/sec, orders of
+//     magnitude below fleet scale.
+
+import http from 'k6/http';
+import { check } from 'k6';
+import { textSummary } from 'https://jslib.k6.io/k6-summary/0.0.2/index.js';
+
+const BASE  = __ENV.CERTCTL_BASE  || 'https://certctl-server:8443';
+const TOKEN = __ENV.CERTCTL_TOKEN || 'load-test-token';
+
+// 5000 agents * (1 / 30s) = 166.67 heartbeats/sec. Round to 167.
+const TARGET_RATE = parseInt(__ENV.K6_AGENT_RATE || '167', 10);
+
+// Total agents in the fleet seed. The k6 scenario picks an agent at
+// random per iteration (deterministic via __ITER) to spread the
+// per-row UPDATE pressure across the table.
+const FLEET_SIZE = parseInt(__ENV.K6_AGENT_FLEET || '5000', 10);
+
+export const options = {
+    scenarios: {
+        agent_storm: {
+            executor: 'constant-arrival-rate',
+            rate: TARGET_RATE,
+            timeUnit: '1s',
+            duration: '5m',
+            preAllocatedVUs: 50,
+            maxVUs: 200,
+            exec: 'heartbeat',
+            tags: { scenario: 'agent_storm' },
+        },
+    },
+    thresholds: {
+        'http_req_duration{scenario:agent_storm}': ['p(99)<1000', 'p(95)<500'],
+        'http_req_failed{scenario:agent_storm}': ['rate<0.001'],
+    },
+    summaryTrendStats: ['avg', 'min', 'med', 'p(95)', 'p(99)', 'max'],
+    insecureSkipTLSVerify: true,
+};
+
+// agentID returns a deterministic agent id from the loadtest fleet
+// seed. Spreading round-robin across the fleet means the UPDATE
+// pressure hits every row equally rather than the same hot row over
+// and over.
+function agentID() {
+    // __ITER is k6's per-VU iteration counter; combined with __VU
+    // (the VU index) we get a unique-per-call number that spans
+    // 0..FLEET_SIZE on the modulo.
+    const idx = (__VU * 1000 + __ITER) % FLEET_SIZE;
+    return 'ag-loadtest-' + String(idx + 1).padStart(5, '0');
+}
+
+export function heartbeat() {
+    const id = agentID();
+    // Optional metadata; the heartbeat handler tolerates an empty body
+    // (no metadata) but real agents send their version + hostname on
+    // every call so we include them here.
+    const payload = JSON.stringify({
+        version: '2.1.0',
+        hostname: 'loadtest-' + id.slice(-5) + '.fleet.example.test',
+        os: 'linux',
+        architecture: 'amd64',
+    });
+
+    const res = http.post(`${BASE}/api/v1/agents/${id}/heartbeat`, payload, {
+        headers: {
+            'Content-Type': 'application/json',
+            'Authorization': `Bearer ${TOKEN}`,
+        },
+        tags: { scenario: 'agent_storm' },
+    });
+
+    check(res, {
+        'heartbeat 2xx': (r) => r.status >= 200 && r.status < 300,
+    });
+}
+
+export function handleSummary(data) {
+    return {
+        '/results/summary-agent-storm.json': JSON.stringify(data, null, 2),
+        '/results/summary-agent-storm.txt': textSummary(data, { indent: ' ', enableColors: false }),
+        stdout: textSummary(data, { indent: ' ', enableColors: true }),
+    };
+}
diff --git a/deploy/test/loadtest/k6/bulk_renewal.js b/deploy/test/loadtest/k6/bulk_renewal.js
new file mode 100644
index 0000000..eed3b68
--- /dev/null
+++ b/deploy/test/loadtest/k6/bulk_renewal.js
@@ -0,0 +1,129 @@
+// Phase 8 SCALE-H2 — bulk-renewal under load.
+//
+// What this measures:
+//   POST /api/v1/certificates/bulk-renew throughput against a
+//   10K-cert pre-seeded fleet. Each iteration POSTs a criteria-mode
+//   bulk-renew request scoped to a subset of the seeded fleet (by
+//   tag) so the server enqueues N renewal jobs and returns a
+//   per-cert {certificate_id, job_id} envelope.
+//
+// Why criteria-mode (not certificate-ids mode):
+//   The seeded fleet has a stable `tags.batch = 'bulk-renewal'`
+//   marker. Criteria-mode lets the scenario re-fire without
+//   maintaining a moving list of cert IDs and still scopes the
+//   action to the Phase 8 fixture (no risk of touching a real
+//   tenant's certs if someone runs the scenario against a non-
+//   loadtest server by mistake — the criteria simply matches
+//   nothing).
+//
+// What this does NOT measure:
+//   - The scheduler's renewal scan itself. The bulk-renew handler
+//     enqueues issuance jobs synchronously into the `jobs` table;
+//     the scheduler's `jobProcessorLoop` picks them up on its next
+//     tick. The DB write throughput is what's measured here; the
+//     job-execution path is bounded by per-issuer concurrency
+//     (CERTCTL_RENEWAL_CONCURRENCY=25 default) and isn't usefully
+//     amplified by adding more inbound bulk-renew calls.
+//   - Full POST → poll deployments → cert-served loop. Same v1/v2
+//     deferral as the connector-tier scenarios — needs the agent
+//     poll surface plumbed end-to-end.
+//
+// Threshold contract:
+//   - p99 < 5s, p95 < 2s for the bulk-renew POST. Each call walks
+//     the criteria, materializes the matching managed_certificates
+//     rows, inserts N rows into `jobs`, and returns the envelope.
+//   - Error rate < 1%. Anything 4xx/5xx counts.
+//
+// Phase 8 reference:
+//   - Source finding: SCALE-H2.
+//   - Pre-state: only the API tier (50 req/s POST /certificates +
+//     GET /certificates) and connector tier (per-target handshake)
+//     were measured. The bulk-renew hot path was uncovered.
+//   - Seed: deploy/test/loadtest/seed/01_bulk_renewal_certs.sql
+//     creates 10K rows with tags.batch='bulk-renewal'. The seed
+//     must run before this scenario; the scale-seed compose
+//     profile gates this.
+
+import http from 'k6/http';
+import { check } from 'k6';
+import { textSummary } from 'https://jslib.k6.io/k6-summary/0.0.2/index.js';
+
+const BASE  = __ENV.CERTCTL_BASE  || 'https://localhost:8443';
+const TOKEN = __ENV.CERTCTL_TOKEN || 'load-test-token';
+
+// Sustained throughput target. constant-arrival-rate at 5 req/s for 5
+// minutes = 1500 bulk-renew POSTs. Each POST touches up to 10K
+// managed_certificates rows (criteria scan) + inserts up to 10K
+// rows into `jobs`, so the offered load is higher than the API
+// tier's 50 req/s on raw queries-per-second but the per-call
+// cost is larger.
+//
+// 5 req/s was picked deliberately:
+//   - 50 req/s combined with the API tier's 50 saturates the demo-
+//     scale compose's DB pool (CERTCTL_DATABASE_MAX_CONNS=50). The
+//     Phase 8 scenario should measure the per-call ceiling without
+//     fighting the pool.
+//   - Each call enqueues thousands of jobs; the scheduler's
+//     jobProcessorLoop has finite per-tick budget. Pushing higher
+//     than 5 req/s would queue work faster than the scheduler
+//     drains it, which produces a transient backlog metric (worth
+//     measuring eventually) but isn't what SCALE-H2 asks for.
+export const options = {
+    scenarios: {
+        bulk_renewal: {
+            executor: 'constant-arrival-rate',
+            rate: 5,
+            timeUnit: '1s',
+            duration: '5m',
+            preAllocatedVUs: 10,
+            maxVUs: 30,
+            exec: 'bulkRenewal',
+            tags: { scenario: 'bulk_renewal' },
+        },
+    },
+    thresholds: {
+        // Single-scenario threshold — narrower than the API tier
+        // because each call is heavier (DB scan + N inserts).
+        'http_req_duration{scenario:bulk_renewal}': ['p(99)<5000', 'p(95)<2000'],
+        'http_req_failed{scenario:bulk_renewal}': ['rate<0.01'],
+    },
+    summaryTrendStats: ['avg', 'min', 'med', 'p(95)', 'p(99)', 'max'],
+    insecureSkipTLSVerify: true,
+};
+
+export function bulkRenewal() {
+    // Scope by team_id — the seed binds every loadtest cert to
+    // t-platform; in a production-multi-tenant deploy, team scoping
+    // is the typical bulk-renew shape. This exercises the criteria
+    // walker AND the team-scoped permission check in the handler.
+    //
+    // NOTE: this does NOT include `tags` because the BulkRenewalCriteria
+    // domain type (handler/bulk_renewal.go) only exposes profile_id,
+    // owner_id, agent_id, issuer_id, team_id, certificate_ids — not
+    // tag-based filtering. The team_id scope plus the production-
+    // separated FK guarantees we only touch the Phase 8 seed.
+    const payload = JSON.stringify({
+        team_id: 't-platform',
+        issuer_id: 'iss-local',
+    });
+
+    const res = http.post(`${BASE}/api/v1/certificates/bulk-renew`, payload, {
+        headers: {
+            'Content-Type': 'application/json',
+            'Authorization': `Bearer ${TOKEN}`,
+        },
+        tags: { scenario: 'bulk_renewal' },
+    });
+
+    check(res, {
+        'bulk-renew 2xx': (r) => r.status >= 200 && r.status < 300,
+    });
+}
+
+export function handleSummary(data) {
+    return {
+        '/results/summary-bulk-renewal.json': JSON.stringify(data, null, 2),
+        '/results/summary-bulk-renewal.txt': textSummary(data, { indent: ' ', enableColors: false }),
+        stdout: textSummary(data, { indent: ' ', enableColors: true }),
+    };
+}
diff --git a/deploy/test/loadtest/seed/01_bulk_renewal_certs.sql b/deploy/test/loadtest/seed/01_bulk_renewal_certs.sql
new file mode 100644
index 0000000..8c75eeb
--- /dev/null
+++ b/deploy/test/loadtest/seed/01_bulk_renewal_certs.sql
@@ -0,0 +1,85 @@
+-- Phase 8 SCALE-H2: bulk-renewal scenario seed.
+--
+-- Generates 10,000 managed_certificates rows linked to the existing
+-- seed_demo.sql FKs (iss-local, o-alice, t-platform, rp-standard) so
+-- the bulk-renewal k6 scenario can POST /api/v1/certificates/bulk-renew
+-- against a fleet-scale dataset instead of the 15-row demo seed.
+--
+-- Behavior:
+--   - Idempotent. ON CONFLICT (name) DO NOTHING — re-running the seed
+--     against an already-seeded DB is a no-op.
+--   - expires_at is uniformly distributed across the next 30 days so
+--     a renewal_window_days = 30 policy considers every row eligible.
+--   - status = 'active' so the renewal selector treats them as
+--     live (the scheduler skips status IN ('pending', 'failed',
+--     'revoked', 'retired')).
+--   - name is generated as 'loadtest-bulk-NNNNN.example.test' for a
+--     stable, predictable identifier the k6 scenario can pattern-match
+--     to scope its criteria to the seeded set (the production fleet
+--     wouldn't share this prefix).
+--
+-- Volume target: 10,000 rows. Insert wall time on the loadtest stack
+-- (postgres:16-alpine, 2 CPU / 4 GiB): typically < 5 seconds via the
+-- single-statement generate_series + INSERT pattern below. The
+-- compose seed-init container runs this BEFORE the k6 driver starts,
+-- so the steady-state load measurement isn't affected by seed time.
+--
+-- Why not generated in Go via a fixtures helper:
+--   - The certctl-server boots from a clean DB and runs migrations +
+--     seed_demo.sql automatically when CERTCTL_DEMO_SEED=true. Adding
+--     a Go-side fixtures helper would require either (a) a new
+--     CERTCTL_LOADTEST_SEED flag wired into cmd/server/main.go (cross-
+--     cutting change for one test path) or (b) a separate seed binary
+--     (more compose surface). Raw SQL is the smallest viable change.
+--
+-- Phase 8 entry point — runs only when the loadtest compose stack is
+-- explicitly opted into the scale-seed via LOADTEST_SCALE_SEED=true.
+
+INSERT INTO managed_certificates (
+    id,
+    name,
+    common_name,
+    sans,
+    environment,
+    owner_id,
+    team_id,
+    issuer_id,
+    renewal_policy_id,
+    status,
+    expires_at,
+    tags,
+    created_at,
+    updated_at
+)
+SELECT
+    'cert-loadtest-bulk-' || lpad(g::text, 5, '0'),
+    'loadtest-bulk-' || lpad(g::text, 5, '0') || '.example.test',
+    'loadtest-bulk-' || lpad(g::text, 5, '0') || '.example.test',
+    ARRAY['loadtest-bulk-' || lpad(g::text, 5, '0') || '.example.test'],
+    'loadtest',
+    'o-alice',
+    't-platform',
+    'iss-local',
+    'rp-standard',
+    'active',
+    -- Distribute expires_at uniformly across the next 30 days so a
+    -- 30-day-window renewal policy sees every row as eligible.
+    NOW() + ((g % 30) || ' days')::interval + ((g % 24) || ' hours')::interval,
+    jsonb_build_object('source', 'loadtest-phase8', 'batch', 'bulk-renewal'),
+    NOW(),
+    NOW()
+FROM generate_series(1, 10000) AS g
+ON CONFLICT (name) DO NOTHING;
+
+-- Confirmation row count — the seed-init container greps this in its
+-- logs to verify the fleet shape post-insert. The output appears in
+-- `docker compose logs certctl-loadtest-scale-seed` after the run.
+DO $$
+DECLARE
+    cert_count integer;
+BEGIN
+    SELECT COUNT(*) INTO cert_count
+    FROM managed_certificates
+    WHERE name LIKE 'loadtest-bulk-%';
+    RAISE NOTICE 'Phase 8 bulk-renewal seed: % managed_certificates rows present', cert_count;
+END $$;
diff --git a/deploy/test/loadtest/seed/02_agent_fleet.sql b/deploy/test/loadtest/seed/02_agent_fleet.sql
new file mode 100644
index 0000000..f5166ca
--- /dev/null
+++ b/deploy/test/loadtest/seed/02_agent_fleet.sql
@@ -0,0 +1,85 @@
+-- Phase 8 SCALE-H2: agent-fleet heartbeat-storm scenario seed.
+--
+-- Generates 5,000 agents rows so the heartbeat-storm k6 scenario can
+-- model a fleet-scale heartbeat pattern (5K agents heartbeating at the
+-- native 30s cadence = ~167 heartbeats/sec sustained) instead of the
+-- ~10-agent demo seed.
+--
+-- Behavior:
+--   - Idempotent. ON CONFLICT (id) DO NOTHING — re-runnable against an
+--     already-seeded DB.
+--   - name is unique (a UNIQUE constraint in migration 000001) so the
+--     name suffix mirrors the id suffix.
+--   - status = 'Online' so the heartbeat handler's retire-check
+--     (service.ErrAgentRetired) doesn't 410 the storm.
+--   - last_heartbeat_at staggered across the prior 60 seconds so the
+--     stale-agent reaper (agentHealthCheckLoop) doesn't immediately
+--     flip half the fleet to 'Offline' during the first scheduler
+--     tick of the load run.
+--   - api_key_hash = 'loadtest_no_auth'. The loadtest compose runs
+--     CERTCTL_AUTH_TYPE=api-key with a single static token
+--     (load-test-token), which bypasses per-agent key check the same
+--     way the existing API tier scenarios do. Production deploys with
+--     CERTCTL_AUTH_TYPE=agent-key per-agent would seed real bcrypt'd
+--     hashes; this column is opaque to the load-test path.
+--   - registered_at = NOW() - random 1-90 day interval so agent age
+--     looks realistic and any age-based query plans are exercised.
+--
+-- Volume target: 5,000 rows. The agents schema is much narrower than
+-- managed_certificates so the insert is sub-second on the loadtest
+-- stack. The 5K agents do not own any deployment_targets in this
+-- fixture (the scenario only measures the heartbeat hot path, not
+-- the work-poll path which depends on cert + target wiring).
+--
+-- Phase 8 entry point — runs only when the loadtest compose stack is
+-- explicitly opted into the scale-seed via LOADTEST_SCALE_SEED=true.
+
+INSERT INTO agents (
+    id,
+    name,
+    hostname,
+    status,
+    last_heartbeat_at,
+    registered_at,
+    api_key_hash,
+    os,
+    architecture,
+    ip_address,
+    version
+)
+SELECT
+    'ag-loadtest-' || lpad(g::text, 5, '0'),
+    'loadtest-agent-' || lpad(g::text, 5, '0'),
+    'loadtest-' || lpad(g::text, 5, '0') || '.fleet.example.test',
+    'Online',
+    -- Stagger last_heartbeat_at across the prior 60 seconds (= 2x the
+    -- agent's native poll interval) so the first wave of incoming
+    -- heartbeats doesn't all arrive in lockstep at t=0.
+    NOW() - ((g % 60) || ' seconds')::interval,
+    -- Registered_at randomized 1-90 days back.
+    NOW() - ((g % 90 + 1) || ' days')::interval,
+    'loadtest_no_auth',
+    -- Mix linux/windows/darwin so the OS distribution column in the
+    -- agents page isn't pure-linux during the storm.
+    CASE (g % 10)
+        WHEN 0 THEN 'windows'
+        WHEN 1 THEN 'darwin'
+        ELSE 'linux'
+    END,
+    -- amd64 dominates; arm64 minority.
+    CASE WHEN (g % 5) = 0 THEN 'arm64' ELSE 'amd64' END,
+    -- IPv4 in the 10.42.0.0/16 fleet range, deterministic per id.
+    '10.42.' || ((g / 256) % 256)::text || '.' || (g % 256)::text,
+    '2.1.0'
+FROM generate_series(1, 5000) AS g
+ON CONFLICT (id) DO NOTHING;
+
+DO $$
+DECLARE
+    agent_count integer;
+BEGIN
+    SELECT COUNT(*) INTO agent_count
+    FROM agents
+    WHERE id LIKE 'ag-loadtest-%';
+    RAISE NOTICE 'Phase 8 agent-storm seed: % agents rows present', agent_count;
+END $$;
diff --git a/deploy/test/loadtest/seed/README.md b/deploy/test/loadtest/seed/README.md
new file mode 100644
index 0000000..93672ec
--- /dev/null
+++ b/deploy/test/loadtest/seed/README.md
@@ -0,0 +1,87 @@
+# Phase 8 load-test seed fixtures
+
+Opt-in seed scripts that grow the loadtest DB from the demo-scale
+fixture (~15 certs / ~10 agents from `migrations/seed_demo.sql`) to
+fleet scale (10K certs + 5K agents) so the Phase 8 SCALE-H2 scenarios
+measure something representative.
+
+## When these run
+
+The default `make loadtest` path does NOT touch this directory — the
+API tier and connector tier scenarios run against the demo seed alone
+and complete in ~5 minutes. The Phase 8 scenarios opt-in via the
+`LOADTEST_SCALE_SEED=true` environment variable; when set, the
+`certctl-loadtest-scale-seed` one-shot init container runs every
+`*.sql` file in this directory in lexical order against the same
+Postgres instance the server uses.
+
+Compose service wiring (see `../docker-compose.yml`):
+- Service: `scale-seed`
+- Profile: `scale-seed` (compose `profiles:` gate; not started by
+  default)
+- Depends on: `postgres` (service_healthy) AND `certctl-server`
+  (service_healthy — server runs schema migrations at boot so the
+  seed runs AFTER tables exist)
+- Order: lexical (`01_bulk_renewal_certs.sql` then
+  `02_agent_fleet.sql`)
+- Idempotent: every script uses `ON CONFLICT DO NOTHING` so re-running
+  is a no-op.
+
+## What gets seeded
+
+| File | Rows | Purpose |
+|---|---|---|
+| `01_bulk_renewal_certs.sql` | 10,000 managed_certificates | Fleet shape for `bulk_renewal.js`. All linked to demo FKs (iss-local, o-alice, t-platform, rp-standard). Status `active`, expires_at distributed across the next 30 days so a 30-day renewal window considers every row eligible. Name prefix `loadtest-bulk-` so the k6 scenario can scope its bulk-renew criteria. |
+| `02_agent_fleet.sql` | 5,000 agents | Fleet shape for `agent_storm.js`. Status `Online`, last_heartbeat_at staggered across prior 60s, name prefix `loadtest-agent-`. OS distribution: 80% linux / 10% windows / 10% darwin. Arch: 80% amd64 / 20% arm64. |
+
+## How to run the Phase 8 scenarios locally
+
+```bash
+cd deploy/test/loadtest
+LOADTEST_SCALE_SEED=true docker compose --profile scale-seed up --build \
+    --abort-on-container-exit --exit-code-from k6-scale
+```
+
+Or via the dedicated Makefile target (preferred for CI parity):
+
+```bash
+make loadtest-scale
+```
+
+## Why SQL fixtures instead of a Go seed binary
+
+- The certctl-server already boots from a clean DB and runs migrations
+  + `seed_demo.sql` when `CERTCTL_DEMO_SEED=true`. Adding a third seed
+  mode (loadtest-scale) would mean either a new
+  `CERTCTL_LOADTEST_SEED` flag wired into `cmd/server/main.go` (cross-
+  cutting change for one test path) or a separate seed binary (more
+  compose surface).
+- Raw SQL is the smallest viable change: each script is a single
+  multi-row `INSERT … SELECT FROM generate_series(…)` plus a
+  `DO $$ … RAISE NOTICE` confirmation block.
+- Idempotency is straightforward via `ON CONFLICT … DO NOTHING` — the
+  same pattern `seed_demo.sql` uses.
+
+## Why these volumes specifically
+
+- **10K certs.** The SCALE-H2 audit asked for "10K certs with
+  renewal_at < now." Round number, fits in postgres:16-alpine on a
+  CI runner without OOM, and large enough that the renewal selector's
+  query plan is exercised (the demo's 15 rows would index-scan
+  trivially).
+- **5K agents.** Heartbeat at 30s cadence = ~167 heartbeats/sec
+  sustained. That's well above the 50 req/s the existing API tier
+  measures and stresses the agent.heartbeat handler's per-call cost
+  (last_heartbeat_at UPDATE + the RBAC permission check + the
+  audit-log row).
+
+If a future scenario needs more rows (50K certs / 10K agents), add a
+new `03_…sql` here and another scenario file. Don't grow the existing
+files — re-running existing scenarios against a different fixture
+shape would invalidate the captured baseline.
+
+## Phase 8 audit reference
+
+Source finding: SCALE-H2 in
+`cowork/certctl-architecture-diligence-audit.html`.
+Phase 8 closure commit: see `git log --grep='Phase 8'`.
diff --git a/docs/operator/scale.md b/docs/operator/scale.md
index 51bd8a3..9ccefd7 100644
--- a/docs/operator/scale.md
+++ b/docs/operator/scale.md
@@ -121,6 +121,116 @@ endpoint and repeat the request with the same value in an
 `If-None-Match:` header — the second request should return 304 with
 an empty body.
 
+## Scale-tier scenarios (SCALE-H2, Phase 8)
+
+Phase 8 (2026-05-14) extended the k6 load-test harness with three new
+scenarios that exercise the scale-relevant load surfaces the original
+API tier left uncovered. They live behind a compose profile gate
+(`docker compose --profile scale`) so the default `make loadtest`
+stays focused on per-PR regression scope. The full set runs weekly on
+the same `loadtest.yml` cron as the API + connector tier.
+
+| Scenario | k6 file | Seed fixture | Sustained load |
+|---|---|---|---|
+| Bulk-renewal under load | `deploy/test/loadtest/k6/bulk_renewal.js` | 10,000 managed_certificates (`seed/01_bulk_renewal_certs.sql`) | 5 req/s POST `/api/v1/certificates/bulk-renew` × 5 min |
+| ACME enrollment burst | `deploy/test/loadtest/k6/acme_burst.js` | (none — unauth surface) | 200 concurrent VUs × directory/nonce/ARI × 5 min |
+| Agent heartbeat storm | `deploy/test/loadtest/k6/agent_storm.js` | 5,000 agents (`seed/02_agent_fleet.sql`) | 167 req/s POST `/api/v1/agents/{id}/heartbeat` × 5 min |
+
+### Threshold contracts (regression guards, NOT measured baselines)
+
+| Scenario | Metric | Threshold |
+|---|---|---|
+| Bulk-renewal | `http_req_duration{scenario:bulk_renewal}` p99 | < 5 s |
+| Bulk-renewal | `http_req_duration{scenario:bulk_renewal}` p95 | < 2 s |
+| Bulk-renewal | `http_req_failed{scenario:bulk_renewal}` | < 1% |
+| ACME burst | `acme_directory_duration` p95 | < 500 ms |
+| ACME burst | `acme_new_nonce_duration` p95 | < 300 ms |
+| ACME burst | `acme_renewal_info_duration` p95 | < 800 ms |
+| ACME burst | `http_req_failed{server_error:true}` 5xx-only | < 0.1% |
+| Agent storm | `http_req_duration{scenario:agent_storm}` p99 | < 1 s |
+| Agent storm | `http_req_duration{scenario:agent_storm}` p95 | < 500 ms |
+| Agent storm | `http_req_failed{scenario:agent_storm}` | < 0.1% |
+
+429 rate-limit responses on the ACME burst are EXPECTED — Phase 5's
+per-account rate limiter SHOULD fire at sustained 200-VU pressure.
+The custom `acme_rate_limited_count` Counter tracks how often it
+fires; `acme_rate_limit_shape_ok` Counter verifies every 429 returns
+the RFC 7807 `application/problem+json` shape with the
+`urn:ietf:params:acme:error:rateLimited` type. A regression that
+returned plain-text 429 or a different problem type would surface as
+`(rate_limited_count - shape_ok_count) > 0` in the summary.
+
+### Measured baseline — TBD pending canonical-hardware capture
+
+The Phase 8 scenarios shipped 2026-05-14. Baseline capture on a
+canonical `ubuntu-latest` GitHub runner is the next operational step;
+until then, the table below holds TBD placeholders. **Do NOT publish
+sandbox-captured numbers here** — the same anti-pattern the original
+loadtest README guards against (sandbox-aggregate placeholder vs
+canonical hardware) applies to Phase 8.
+
+| Scenario | p50 | p95 | p99 | Error rate | Date measured | Commit |
+|---|---|---|---|---|---|---|
+| **bulk_renewal** | TBD | TBD | TBD | TBD | — | — |
+| **acme_burst** directory | TBD | TBD | TBD | TBD | — | — |
+| **acme_burst** new-nonce | TBD | TBD | TBD | TBD | — | — |
+| **acme_burst** renewal-info | TBD | TBD | TBD | TBD | — | — |
+| **agent_storm** | TBD | TBD | TBD | TBD | — | — |
+
+Capture procedure: trigger `loadtest.yml` from the Actions tab against
+the current `master` SHA; wait for the `k6-scale` matrix jobs to
+complete; download the per-scenario summary artifacts; copy p50/p95/
+p99 from `summary-<scenario>.json` into the table; commit the
+captured numbers alongside the date + SHA. Replace this paragraph
+with the captured-on row when the first canonical run lands.
+
+### How to run the scale tier locally
+
+```sh
+# All three scenarios serially (~18 min total):
+make loadtest-scale
+
+# Individual scenarios (each ~6 min):
+make loadtest-scale-bulk     # 10K cert bulk-renew
+make loadtest-scale-acme     # 200 VU ACME burst
+make loadtest-scale-agent    # 5K agent heartbeat storm
+```
+
+Each scenario boots its own copy of the loadtest compose stack
+(postgres + tls-init + certctl-server) plus the `scale-seed` init
+container that runs the SQL fixtures from `deploy/test/loadtest/seed/`.
+The seed is idempotent (`ON CONFLICT … DO NOTHING`) so re-running a
+scenario against the same compose stack is cheap.
+
+### Documented limitations of the scale tier
+
+- **JWS-signed ACME flows are not measured.** The ACME burst scenario
+  hits the unauthenticated directory + new-nonce + ARI surface only.
+  Measuring the JWS-signed POST hot path (new-account / new-order /
+  finalize) requires bundling a JWS signer into the k6 driver (k6
+  doesn't ship JWS). End-to-end JWS conformance is gated by
+  `make acme-rfc-conformance-test` which drives `lego` against the
+  same stack.
+- **Scheduler renewal scan throughput.** The bulk-renewal scenario
+  measures the inbound POST throughput; the scheduler's
+  `jobProcessorLoop` drains the enqueued jobs at a fixed per-tick
+  budget (`CERTCTL_RENEWAL_CONCURRENCY=25` default), and the
+  throughput of that path is not amplified by adding more inbound
+  bulk-renew calls. A future scenario could pull
+  `/api/v1/jobs?status=pending` and measure drain time.
+- **Production-sized Postgres.** The compose stack runs
+  `postgres:16-alpine` with default config on a CI runner.
+  Production deploys with `shared_buffers >= 1 GiB` + dedicated
+  Postgres VM will have different query plans for the 10K-cert
+  scan. The captured numbers translate directionally but the
+  absolute ceiling is workload-specific — see the operator-tune
+  ladder above for production sizing.
+- **Pull-only deployment model.** Agent CSR submit, work-poll, and
+  deploy-verify paths are intentionally out of scope. The heartbeat
+  storm exercises the highest-frequency call on a typical fleet;
+  the work-poll path runs at the same cadence but is cheap (empty
+  set returned 99% of the time).
+
 ## Profiling production
 
 When the above ladder doesn't fit your shape, profile against your