# Certctl Production HA Configuration # High availability deployment with: # - 3 server replicas with pod anti-affinity # - Large PostgreSQL storage # - Resource limits for production # - Prometheus monitoring # - Network policies enforcement namespace: certctl server: replicas: 3 image: repository: ghcr.io/certctl-io/certctl tag: "2.1.0" pullPolicy: IfNotPresent port: 8443 resources: requests: cpu: 250m memory: 256Mi limits: cpu: 1000m memory: 512Mi auth: type: api-key apiKey: "CHANGE_ME_IN_PRODUCTION" # Use --set or sealed-secrets logging: level: info format: json service: type: ClusterIP # DEPL-006 closure (Sprint 3, 2026-05-16): with replicas:3, the # default round-robin Service load balancing breaks login/CSRF # flows because the session cookie + the CSRF token row land on # different pods between requests. sessionAffinity: ClientIP # routes every connection from a given source IP to the same # pod for the configured timeout window. docs/operator/runbooks/ha.md # documents this; pre-fix the chart did not actually render it. sessionAffinity: ClientIP annotations: prometheus.io/scrape: "true" prometheus.io/port: "8443" prometheus.io/path: "/api/v1/metrics/prometheus" issuer: local: enabled: true acme: enabled: true directoryURL: https://acme-v02.api.letsencrypt.org/directory email: admin@example.com challengeType: dns-01 rateLimiting: rps: 500 burst: 1000 # DEPL-006 closure (Sprint 3, 2026-05-16): replicas > 1 REQUIRES # the postgres backend so per-key buckets are cross-replica- # consistent. The default 'memory' backend gives each pod its # own bucket map, so a 3-replica fleet effectively triples the # configured cap (a client churning across pods bypasses the # limit). See deploy/helm/certctl/values.yaml L217-226 for the # canonical comment. backend: postgres postgresql: enabled: true image: repository: postgres tag: "16-alpine" pullPolicy: IfNotPresent auth: database: certctl username: certctl password: "CHANGE_ME_IN_PRODUCTION" # Use --set or sealed-secrets storage: size: 100Gi storageClass: "fast-ssd" # Use your high-performance storage class resources: requests: cpu: 500m memory: 512Mi limits: cpu: 2000m memory: 2Gi agent: enabled: true kind: DaemonSet image: repository: ghcr.io/certctl-io/certctl-agent tag: "2.1.0" pullPolicy: IfNotPresent resources: requests: cpu: 100m memory: 128Mi limits: cpu: 500m memory: 256Mi discoveryDirs: "/etc/ssl/certs,/etc/pki/tls,/etc/ssl" ingress: enabled: true className: nginx annotations: cert-manager.io/cluster-issuer: letsencrypt-prod nginx.ingress.kubernetes.io/ssl-redirect: "true" nginx.ingress.kubernetes.io/force-ssl-redirect: "true" hosts: - host: certctl.example.com paths: - path: / pathType: Prefix tls: - secretName: certctl-tls hosts: - certctl.example.com serviceAccount: create: true annotations: eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/certctl-role # For IRSA on AWS rbac: create: true podDisruptionBudget: enabled: true minAvailable: 2 monitoring: enabled: true serviceMonitor: enabled: true interval: 30s scrapeTimeout: 10s # Pod anti-affinity for HA podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - key: app.kubernetes.io/name operator: In values: - certctl - key: app.kubernetes.io/component operator: In values: - server topologyKey: kubernetes.io/hostname customLabels: environment: production team: platform cost-center: ops customAnnotations: slack-alerts: "#ops" backup-policy: daily