diff --git a/cmd/agent/deploy.go b/cmd/agent/deploy.go new file mode 100644 index 0000000..9bc679c --- /dev/null +++ b/cmd/agent/deploy.go @@ -0,0 +1,443 @@ +// Copyright 2026 certctl LLC. All rights reserved. +// SPDX-License-Identifier: BUSL-1.1 + +package main + +import ( + "context" + "encoding/json" + "encoding/pem" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + + "github.com/certctl-io/certctl/internal/connector/target" + "github.com/certctl-io/certctl/internal/connector/target/apache" + "github.com/certctl-io/certctl/internal/connector/target/awsacm" + "github.com/certctl-io/certctl/internal/connector/target/azurekv" + "github.com/certctl-io/certctl/internal/connector/target/caddy" + "github.com/certctl-io/certctl/internal/connector/target/envoy" + "github.com/certctl-io/certctl/internal/connector/target/f5" + "github.com/certctl-io/certctl/internal/connector/target/haproxy" + "github.com/certctl-io/certctl/internal/connector/target/iis" + jks "github.com/certctl-io/certctl/internal/connector/target/javakeystore" + k8s "github.com/certctl-io/certctl/internal/connector/target/k8ssecret" + "github.com/certctl-io/certctl/internal/connector/target/nginx" + pf "github.com/certctl-io/certctl/internal/connector/target/postfix" + sshconn "github.com/certctl-io/certctl/internal/connector/target/ssh" + "github.com/certctl-io/certctl/internal/connector/target/traefik" + wcs "github.com/certctl-io/certctl/internal/connector/target/wincertstore" +) + +// Phase 9 ARCH-M2 closure Sprint 12 (2026-05-14): extracted from +// cmd/agent/main.go via the Option B sibling-file pattern. +// +// This file holds the DEPLOYMENT executor + the target connector +// factory + the deploy-only helpers: +// +// - executeDeploymentJob: handles Pending deployment jobs by +// fetching the cert PEM from the control plane, loading the +// locally-held private key (in agent keygen mode), instantiating +// the appropriate target connector via createTargetConnector, +// calling DeployCertificate on it, and reporting Completed or +// Failed back to the control plane. +// - createTargetConnector: the big switch over target_type that +// instantiates one of 14 target connectors (apache / awsacm / +// azurekv / caddy / envoy / f5 / haproxy / iis / javakeystore / +// k8ssecret / nginx / postfix / ssh / traefik / wincertstore). +// Context is threaded into SDK-driven connectors (AWSACM, +// AzureKeyVault) so credential resolution honors caller +// cancellation per the contextcheck linter — see CI commit +// 502823d. +// - splitPEMChain: split a PEM chain into (first cert, rest). +// - fetchCertificate: pull the PEM chain from +// GET /api/v1/certificates/{certID}/version. +// +// All 14 target-connector imports were used ONLY by +// createTargetConnector; moving the factory here also moved the +// 14 connector imports out of main.go, leaving the surviving +// cmd/agent/main.go with the minimal stdlib surface its lifecycle +// + HTTP infrastructure needs. + +// executeDeploymentJob executes a deployment job by fetching the certificate and deploying it +// to the target system using the appropriate connector (NGINX, F5 BIG-IP, or IIS). +// +// For agent keygen mode, the private key is read from the local key store (keyDir/certID.key) +// rather than fetched from the server. The deployment includes the locally-held key. +// +// Flow: +// 1. Report job as Running +// 2. Fetch the certificate PEM from the control plane +// 3. Load local private key if it exists (agent keygen mode) +// 4. Instantiate the target connector based on target_type from the work response +// 5. Call DeployCertificate on the connector +// 6. Report job as Completed (or Failed) +func (a *Agent) executeDeploymentJob(ctx context.Context, job JobItem) { + a.logger.Info("executing deployment job", + "job_id", job.ID, + "certificate_id", job.CertificateID, + "target_type", job.TargetType) + + // Report job as running + if err := a.reportJobStatus(ctx, job.ID, "Running", ""); err != nil { + a.logger.Error("failed to report job running", "error", err) + } + + // Fetch the certificate from the control plane + certPEM, err := a.fetchCertificate(ctx, job.CertificateID) + if err != nil { + a.logger.Error("failed to fetch certificate", + "job_id", job.ID, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("cert fetch failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + a.logger.Info("certificate fetched for deployment", + "job_id", job.ID, + "cert_length", len(certPEM)) + + // Split PEM into cert and chain (separated by double newline between PEM blocks) + certOnly, chainPEM := splitPEMChain(certPEM) + + // Check for locally-stored private key (agent keygen mode) + keyPath := filepath.Join(a.config.KeyDir, job.CertificateID+".key") + var keyPEM string + keyData, err := os.ReadFile(keyPath) + if err != nil { + a.logger.Error("failed to read local private key for deployment", + "job_id", job.ID, + "key_path", keyPath, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key read failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "error", reportErr) + } + return + } + keyPEM = string(keyData) + a.logger.Info("loaded local private key for deployment", + "job_id", job.ID, + "key_path", keyPath) + + // Deploy to the target using the appropriate connector + if job.TargetType != "" { + connector, err := a.createTargetConnector(ctx, job.TargetType, job.TargetConfig) + if err != nil { + a.logger.Error("failed to create target connector", + "job_id", job.ID, + "target_type", job.TargetType, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("connector init failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + // Bundle 1 / RT-C1 closure (2026-05-12): defense in depth. The server + // runs internal/connector/target/configcheck.Validate on the way IN + // (Create/Update), and rejects shell metacharacters in command-bearing + // fields. Re-run the connector's full ValidateConfig here on the way + // OUT, before any DeployCertificate call. This catches (a) configs + // that pre-date the server-side guard, (b) corruption/tampering of + // the encrypted config blob, and (c) per-connector filesystem + // invariants (cert dir exists, paths writable) that the server can't + // check because the filesystem is on the agent host. + if err := connector.ValidateConfig(ctx, job.TargetConfig); err != nil { + a.logger.Error("connector config validation failed", + "job_id", job.ID, + "target_type", job.TargetType, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("%s config validation failed: %v", job.TargetType, err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + deployReq := target.DeploymentRequest{ + CertPEM: certOnly, + KeyPEM: keyPEM, + ChainPEM: chainPEM, + TargetConfig: job.TargetConfig, + Metadata: map[string]string{ + "certificate_id": job.CertificateID, + "job_id": job.ID, + }, + } + + // Phase 2 of the deploy-hardening I master bundle: + // per-target deploy mutex. Acquire BEFORE + // DeployCertificate so two concurrent renewals against + // the same target ID serialize. The lock is held for the + // full Deploy duration including PreCommit (validate), + // PostCommit (reload), and post-deploy verify (Phases + // 4-9). Released on every return path via defer. + var targetID string + if job.TargetID != nil { + targetID = *job.TargetID + } + if mu := a.targetDeployMutex(targetID); mu != nil { + mu.Lock() + defer mu.Unlock() + } + + result, err := connector.DeployCertificate(ctx, deployReq) + if err != nil { + a.logger.Error("deployment failed", + "job_id", job.ID, + "target_type", job.TargetType, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("deployment failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + a.logger.Info("target connector deployment completed", + "job_id", job.ID, + "target_type", job.TargetType, + "success", result.Success, + "message", result.Message) + + // If verification is enabled, verify the deployment by probing the live TLS endpoint + targetHost, targetPort, err := extractTargetHostAndPort(job.TargetConfig) + if err != nil { + a.logger.Warn("could not extract target host/port for verification", + "job_id", job.ID, + "error", err) + } else { + a.verifyAndReportDeployment(ctx, job, targetHost, targetPort, certOnly) + } + } else { + a.logger.Info("no target type specified, skipping connector invocation", + "job_id", job.ID) + } + + // Report job as completed + if err := a.reportJobStatus(ctx, job.ID, "Completed", ""); err != nil { + a.logger.Error("failed to report job completed", "error", err) + return + } + + a.logger.Info("deployment job completed", "job_id", job.ID) +} + +// createTargetConnector instantiates the appropriate target connector based on type. +// ctx is threaded into SDK-driven connectors (AWSACM, AzureKeyVault) so credential +// resolution honors caller cancellation / deadlines instead of using a fresh +// context.Background() (the contextcheck linter enforces this — the original Rank 5 +// implementation used Background() and tripped CI on commit 502823d). +func (a *Agent) createTargetConnector(ctx context.Context, targetType string, configJSON json.RawMessage) (target.Connector, error) { + switch targetType { + case "NGINX": + var cfg nginx.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid NGINX config: %w", err) + } + } + return nginx.New(&cfg, a.logger), nil + + case "Apache": + var cfg apache.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid Apache config: %w", err) + } + } + return apache.New(&cfg, a.logger), nil + + case "HAProxy": + var cfg haproxy.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid HAProxy config: %w", err) + } + } + return haproxy.New(&cfg, a.logger), nil + + case "F5": + var cfg f5.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid F5 config: %w", err) + } + } + conn, err := f5.New(&cfg, a.logger) + if err != nil { + return nil, fmt.Errorf("failed to create F5 connector: %w", err) + } + return conn, nil + + case "IIS": + var cfg iis.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid IIS config: %w", err) + } + } + return iis.New(&cfg, a.logger) + + case "Traefik": + var cfg traefik.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid Traefik config: %w", err) + } + } + return traefik.New(&cfg, a.logger), nil + + case "Caddy": + var cfg caddy.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid Caddy config: %w", err) + } + } + return caddy.New(&cfg, a.logger), nil + + case "Envoy": + var cfg envoy.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid Envoy config: %w", err) + } + } + return envoy.New(&cfg, a.logger), nil + + case "Postfix": + var cfg pf.Config + cfg.Mode = "postfix" + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid Postfix config: %w", err) + } + } + return pf.New(&cfg, a.logger), nil + + case "Dovecot": + var cfg pf.Config + cfg.Mode = "dovecot" + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid Dovecot config: %w", err) + } + } + return pf.New(&cfg, a.logger), nil + + case "SSH": + var cfg sshconn.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid SSH config: %w", err) + } + } + return sshconn.New(&cfg, a.logger) + + case "WinCertStore": + var cfg wcs.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid WinCertStore config: %w", err) + } + } + return wcs.New(&cfg, a.logger) + + case "JavaKeystore": + var cfg jks.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid JavaKeystore config: %w", err) + } + } + return jks.New(&cfg, a.logger), nil + + case "KubernetesSecrets": + var cfg k8s.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid KubernetesSecrets config: %w", err) + } + } + return k8s.New(&cfg, a.logger) + + case "AWSACM": + // Rank 5 of the 2026-05-03 Infisical deep-research deliverable. + // AWS Certificate Manager target — SDK-driven (no file I/O). + // LoadDefaultConfig handles the standard AWS credential chain + // (IRSA / EC2 instance profile / SSO / env vars) without any + // long-lived creds in connector Config. + var cfg awsacm.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid AWSACM config: %w", err) + } + } + return awsacm.New(ctx, &cfg, a.logger) + + case "AzureKeyVault": + // Rank 5 of the 2026-05-03 Infisical deep-research deliverable. + // Azure Key Vault target — SDK-driven (no file I/O). + // DefaultAzureCredential handles the standard Azure credential + // chain (managed identity / workload identity / env vars / az + // CLI fallback). Long-lived service-principal secrets are + // supported but discouraged via the credential_mode config. + var cfg azurekv.Config + if len(configJSON) > 0 { + if err := json.Unmarshal(configJSON, &cfg); err != nil { + return nil, fmt.Errorf("invalid AzureKeyVault config: %w", err) + } + } + return azurekv.New(ctx, &cfg, a.logger) + + default: + return nil, fmt.Errorf("unsupported target type: %s", targetType) + } +} + +// splitPEMChain splits a PEM chain into the first certificate (cert) and the rest (chain). +// The control plane returns the full chain as a single string with PEM blocks concatenated. +func splitPEMChain(pemChain string) (string, string) { + data := []byte(pemChain) + block, rest := pem.Decode(data) + if block == nil { + return pemChain, "" + } + cert := string(pem.EncodeToMemory(block)) + + // Skip whitespace between cert and chain + chain := strings.TrimSpace(string(rest)) + if chain == "" { + return cert, "" + } + return cert, chain +} + +// fetchCertificate retrieves the certificate PEM chain from the control plane. +// GET /api/v1/agents/{agentID}/certificates/{certID} +func (a *Agent) fetchCertificate(ctx context.Context, certID string) (string, error) { + path := fmt.Sprintf("/api/v1/agents/%s/certificates/%s", a.config.AgentID, certID) + resp, err := a.makeRequest(ctx, http.MethodGet, path, nil) + if err != nil { + return "", fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("server returned %d: %s", resp.StatusCode, string(body)) + } + + var certResp struct { + CertificatePEM string `json:"certificate_pem"` + } + if err := json.NewDecoder(resp.Body).Decode(&certResp); err != nil { + return "", fmt.Errorf("failed to decode response: %w", err) + } + + return certResp.CertificatePEM, nil +} diff --git a/cmd/agent/discovery.go b/cmd/agent/discovery.go new file mode 100644 index 0000000..608486b --- /dev/null +++ b/cmd/agent/discovery.go @@ -0,0 +1,275 @@ +// Copyright 2026 certctl LLC. All rights reserved. +// SPDX-License-Identifier: BUSL-1.1 + +package main + +import ( + "context" + "crypto/ecdsa" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "encoding/pem" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "time" +) + +// Phase 9 ARCH-M2 closure Sprint 12 (2026-05-14): extracted from +// cmd/agent/main.go via the Option B sibling-file pattern. +// +// This file holds the filesystem DISCOVERY scan — the agent's +// outbound surface for reporting pre-existing certificates it +// finds on disk back to the control plane (POST /api/v1/agents/ +// {id}/discoveries, a machine-to-machine flow NOT exposed via the +// MCP surface per the comment in +// internal/mcp/tools.go::RegisterTools): +// +// - runDiscoveryScan: walks each configured discovery directory, +// dispatches each candidate file to parsePEMFile or parseDERFile +// depending on extension, batches the parsed entries, and POSTs +// them in one report. +// - parsePEMFile / parseDERFile: extract every X.509 certificate +// from a candidate file in either encoding. +// - certToEntry: project a parsed *x509.Certificate into the +// discoveredCertEntry shape the control plane expects. +// - discoveredCertEntry struct + sha256Sum + certKeyInfo helpers +// consumed only by the discovery path; co-locating them keeps +// this file self-contained. + +// runDiscoveryScan walks configured directories, parses certificate files, and reports +// discovered certificates to the control plane. +// Supports PEM and DER encoded X.509 certificates. +func (a *Agent) runDiscoveryScan(ctx context.Context) { + a.logger.Info("starting filesystem certificate discovery scan", + "directories", a.config.DiscoveryDirs) + + startTime := time.Now() + var certs []discoveredCertEntry + var scanErrors []string + + for _, dir := range a.config.DiscoveryDirs { + a.logger.Debug("scanning directory", "path", dir) + + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + scanErrors = append(scanErrors, fmt.Sprintf("walk error at %s: %v", path, err)) + return nil // continue walking + } + if info.IsDir() { + return nil + } + + // Skip files larger than 1MB (unlikely to be a certificate) + if info.Size() > 1*1024*1024 { + return nil + } + + // Check file extension + ext := strings.ToLower(filepath.Ext(path)) + switch ext { + case ".pem", ".crt", ".cer", ".cert": + found := a.parsePEMFile(path) + certs = append(certs, found...) + case ".der": + if entry, err := a.parseDERFile(path); err == nil { + certs = append(certs, entry) + } else { + a.logger.Debug("skipping non-cert DER file", "path", path, "error", err) + } + default: + // Try PEM parsing for extensionless files or unknown extensions + if ext == "" || ext == ".key" { + return nil // skip key files and extensionless + } + found := a.parsePEMFile(path) + if len(found) > 0 { + certs = append(certs, found...) + } + } + return nil + }) + if err != nil { + scanErrors = append(scanErrors, fmt.Sprintf("failed to walk %s: %v", dir, err)) + } + } + + scanDuration := time.Since(startTime) + a.logger.Info("discovery scan completed", + "certificates_found", len(certs), + "errors", len(scanErrors), + "duration_ms", scanDuration.Milliseconds()) + + if len(certs) == 0 && len(scanErrors) == 0 { + a.logger.Debug("no certificates found and no errors, skipping report") + return + } + + // Build report payload + entries := make([]map[string]interface{}, len(certs)) + for i, c := range certs { + entries[i] = map[string]interface{}{ + "fingerprint_sha256": c.FingerprintSHA256, + "common_name": c.CommonName, + "sans": c.SANs, + "serial_number": c.SerialNumber, + "issuer_dn": c.IssuerDN, + "subject_dn": c.SubjectDN, + "not_before": c.NotBefore, + "not_after": c.NotAfter, + "key_algorithm": c.KeyAlgorithm, + "key_size": c.KeySize, + "is_ca": c.IsCA, + "pem_data": c.PEMData, + "source_path": c.SourcePath, + "source_format": c.SourceFormat, + } + } + + report := map[string]interface{}{ + "agent_id": a.config.AgentID, + "directories": a.config.DiscoveryDirs, + "certificates": entries, + "errors": scanErrors, + "scan_duration_ms": int(scanDuration.Milliseconds()), + } + + // Submit to control plane + path := fmt.Sprintf("/api/v1/agents/%s/discoveries", a.config.AgentID) + resp, err := a.makeRequest(ctx, http.MethodPost, path, report) + if err != nil { + a.logger.Error("failed to submit discovery report", "error", err) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusAccepted { + body, _ := io.ReadAll(resp.Body) + a.logger.Error("discovery report rejected", + "status", resp.StatusCode, + "body", string(body)) + return + } + + a.logger.Info("discovery report submitted successfully", + "certificates", len(certs), + "errors", len(scanErrors)) +} + +// discoveredCertEntry holds parsed certificate metadata for reporting. +type discoveredCertEntry struct { + FingerprintSHA256 string `json:"fingerprint_sha256"` + CommonName string `json:"common_name"` + SANs []string `json:"sans"` + SerialNumber string `json:"serial_number"` + IssuerDN string `json:"issuer_dn"` + SubjectDN string `json:"subject_dn"` + NotBefore string `json:"not_before"` + NotAfter string `json:"not_after"` + KeyAlgorithm string `json:"key_algorithm"` + KeySize int `json:"key_size"` + IsCA bool `json:"is_ca"` + PEMData string `json:"pem_data"` + SourcePath string `json:"source_path"` + SourceFormat string `json:"source_format"` +} + +// parsePEMFile reads a file and extracts all X.509 certificates from PEM blocks. +func (a *Agent) parsePEMFile(path string) []discoveredCertEntry { + data, err := os.ReadFile(path) + if err != nil { + a.logger.Debug("failed to read file", "path", path, "error", err) + return nil + } + + var entries []discoveredCertEntry + rest := data + for { + var block *pem.Block + block, rest = pem.Decode(rest) + if block == nil { + break + } + if block.Type != "CERTIFICATE" { + continue + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + a.logger.Debug("failed to parse certificate in PEM", "path", path, "error", err) + continue + } + + pemStr := string(pem.EncodeToMemory(block)) + entries = append(entries, certToEntry(cert, path, "PEM", pemStr)) + } + return entries +} + +// parseDERFile reads a DER-encoded certificate file. +func (a *Agent) parseDERFile(path string) (discoveredCertEntry, error) { + data, err := os.ReadFile(path) + if err != nil { + return discoveredCertEntry{}, fmt.Errorf("read failed: %w", err) + } + + cert, err := x509.ParseCertificate(data) + if err != nil { + return discoveredCertEntry{}, fmt.Errorf("parse failed: %w", err) + } + + // Convert to PEM for storage + pemStr := string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: data})) + return certToEntry(cert, path, "DER", pemStr), nil +} + +// certToEntry converts a parsed x509.Certificate into a discoveredCertEntry. +func certToEntry(cert *x509.Certificate, path, format, pemData string) discoveredCertEntry { + // Compute SHA-256 fingerprint + fingerprint := fmt.Sprintf("%x", sha256Sum(cert.Raw)) + + // Determine key algorithm and size + keyAlg, keySize := certKeyInfo(cert) + + return discoveredCertEntry{ + FingerprintSHA256: fingerprint, + CommonName: cert.Subject.CommonName, + SANs: cert.DNSNames, + SerialNumber: cert.SerialNumber.Text(16), + IssuerDN: cert.Issuer.String(), + SubjectDN: cert.Subject.String(), + NotBefore: cert.NotBefore.UTC().Format(time.RFC3339), + NotAfter: cert.NotAfter.UTC().Format(time.RFC3339), + KeyAlgorithm: keyAlg, + KeySize: keySize, + IsCA: cert.IsCA, + PEMData: pemData, + SourcePath: path, + SourceFormat: format, + } +} + +// sha256Sum returns the SHA-256 hash of data. +func sha256Sum(data []byte) [32]byte { + return sha256.Sum256(data) +} + +// certKeyInfo extracts key algorithm name and size from a certificate. +func certKeyInfo(cert *x509.Certificate) (string, int) { + switch pub := cert.PublicKey.(type) { + case *ecdsa.PublicKey: + return "ECDSA", pub.Curve.Params().BitSize + case *rsa.PublicKey: + return "RSA", pub.N.BitLen() + default: + switch cert.PublicKeyAlgorithm { + case x509.Ed25519: + return "Ed25519", 256 + default: + return cert.PublicKeyAlgorithm.String(), 0 + } + } +} diff --git a/cmd/agent/main.go b/cmd/agent/main.go index 68a09b1..efaa270 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -6,16 +6,9 @@ package main import ( "bytes" "context" - "crypto/ecdsa" - "crypto/elliptic" - "crypto/rand" - "crypto/rsa" - "crypto/sha256" "crypto/tls" "crypto/x509" - "crypto/x509/pkix" "encoding/json" - "encoding/pem" "errors" "flag" "fmt" @@ -26,29 +19,11 @@ import ( "net/url" "os" "os/signal" - "path/filepath" "runtime" "strings" "sync" "syscall" "time" - - "github.com/certctl-io/certctl/internal/connector/target" - "github.com/certctl-io/certctl/internal/connector/target/apache" - "github.com/certctl-io/certctl/internal/connector/target/awsacm" - "github.com/certctl-io/certctl/internal/connector/target/azurekv" - "github.com/certctl-io/certctl/internal/connector/target/caddy" - "github.com/certctl-io/certctl/internal/connector/target/envoy" - "github.com/certctl-io/certctl/internal/connector/target/f5" - "github.com/certctl-io/certctl/internal/connector/target/haproxy" - "github.com/certctl-io/certctl/internal/connector/target/iis" - jks "github.com/certctl-io/certctl/internal/connector/target/javakeystore" - k8s "github.com/certctl-io/certctl/internal/connector/target/k8ssecret" - "github.com/certctl-io/certctl/internal/connector/target/nginx" - pf "github.com/certctl-io/certctl/internal/connector/target/postfix" - sshconn "github.com/certctl-io/certctl/internal/connector/target/ssh" - "github.com/certctl-io/certctl/internal/connector/target/traefik" - wcs "github.com/certctl-io/certctl/internal/connector/target/wincertstore" ) // AgentConfig represents the agent-side configuration. @@ -394,618 +369,6 @@ func (a *Agent) sendHeartbeat(ctx context.Context) { a.logger.Debug("heartbeat acknowledged") } -// pollForWork queries the control plane for actionable jobs and processes them. -// Jobs may be deployment jobs (Pending) or CSR jobs (AwaitingCSR). -// GET /api/v1/agents/{agentID}/work -func (a *Agent) pollForWork(ctx context.Context) { - a.logger.Debug("polling for work", "agent_id", a.config.AgentID) - - path := fmt.Sprintf("/api/v1/agents/%s/work", a.config.AgentID) - resp, err := a.makeRequest(ctx, http.MethodGet, path, nil) - if err != nil { - a.logger.Error("work poll failed", "error", err) - a.consecutiveFailures++ - return - } - defer resp.Body.Close() - - // I-004: same terminal-retirement handling as sendHeartbeat. Work-poll is the - // other hot path that can observe an agent's soft-retirement; if the - // heartbeat tick happens to fire after a work-poll tick within the same - // retirement window, this branch catches it first. markRetired's sync.Once - // guards idempotency so racing both paths in the same tick only closes the - // signal channel once. No consecutiveFailures increment — retirement is - // not a transient failure. - if resp.StatusCode == http.StatusGone { - body, _ := io.ReadAll(resp.Body) - a.markRetired("work_poll", resp.StatusCode, string(body)) - return - } - - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - a.logger.Error("work poll rejected", - "status", resp.StatusCode, - "body", string(body)) - a.consecutiveFailures++ - return - } - - var workResp WorkResponse - if err := json.NewDecoder(resp.Body).Decode(&workResp); err != nil { - a.logger.Error("failed to decode work response", "error", err) - a.consecutiveFailures++ - return - } - - a.consecutiveFailures = 0 - - if workResp.Count == 0 { - a.logger.Debug("no pending work") - return - } - - a.logger.Info("received work", "job_count", workResp.Count) - - // Process each job based on type and status - for _, job := range workResp.Jobs { - switch { - case job.Status == "AwaitingCSR": - // Agent keygen mode: generate key locally, create CSR, submit to server - a.executeCSRJob(ctx, job) - case job.Type == "Deployment": - a.executeDeploymentJob(ctx, job) - } - } -} - -// executeCSRJob handles an AwaitingCSR job: generates a private key locally, creates a CSR, -// and submits it to the control plane for signing. The private key is stored on the local -// filesystem with 0600 permissions and NEVER sent to the server. -// -// Flow: -// 1. Generate ECDSA P-256 key pair -// 2. Store private key to disk (keyDir/certID.key) with 0600 permissions -// 3. Create CSR with common name and SANs from work response -// 4. Submit CSR to control plane via POST /agents/{id}/csr -// 5. Server signs the CSR and creates a cert version + deployment jobs -func (a *Agent) executeCSRJob(ctx context.Context, job JobItem) { - a.logger.Info("executing CSR job (agent-side key generation)", - "job_id", job.ID, - "certificate_id", job.CertificateID, - "common_name", job.CommonName) - - // Step 1: Generate ECDSA P-256 key pair - privKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) - if err != nil { - a.logger.Error("failed to generate private key", - "job_id", job.ID, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key generation failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - a.logger.Info("generated ECDSA P-256 key pair locally", - "job_id", job.ID, - "certificate_id", job.CertificateID) - - // Step 2: Store private key to disk with secure permissions. - // - // Bundle-9 / Audit L-002 + L-003: marshal+write through helpers that - // (a) zeroize the in-heap DER buffer immediately after the PEM block is - // constructed so the private scalar's exposure window is bounded by - // this function call, and (b) assert the key directory is mode 0700 - // before any write touches disk. Also defer-clear the PEM buffer for - // the same reason — the encoded key isn't sensitive in transit (it's - // going to disk) but lingers on the heap if we don't. - keyPath := filepath.Join(a.config.KeyDir, job.CertificateID+".key") - if err := ensureAgentKeyDirSecure(filepath.Dir(keyPath)); err != nil { - a.logger.Error("agent key dir hardening failed", "job_id", job.ID, "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key dir hardening failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - var privKeyPEM []byte - if marshalErr := marshalAgentKeyAndZeroize(privKey, func(der []byte) error { - privKeyPEM = pem.EncodeToMemory(&pem.Block{ - Type: "EC PRIVATE KEY", - Bytes: der, - }) - return nil - }); marshalErr != nil { - a.logger.Error("failed to marshal private key", - "job_id", job.ID, - "error", marshalErr) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key marshal failed: %v", marshalErr)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - defer clear(privKeyPEM) - - if err := os.WriteFile(keyPath, privKeyPEM, 0600); err != nil { - a.logger.Error("failed to write private key to disk", - "job_id", job.ID, - "key_path", keyPath, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key storage failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - a.logger.Info("private key stored securely", - "job_id", job.ID, - "key_path", keyPath, - "permissions", "0600") - - // Validate common name is present - if job.CommonName == "" { - a.logger.Error("empty common name in CSR job", "job_id", job.ID) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", "empty common name"); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "error", reportErr) - } - return - } - - // Step 3: Create CSR with common name and SANs - // Split SANs into DNS names and email addresses for proper CSR encoding - var dnsNames []string - var emailAddresses []string - for _, san := range job.SANs { - if strings.Contains(san, "@") { - emailAddresses = append(emailAddresses, san) - } else { - dnsNames = append(dnsNames, san) - } - } - - csrTemplate := &x509.CertificateRequest{ - Subject: pkix.Name{ - CommonName: job.CommonName, - }, - DNSNames: dnsNames, - EmailAddresses: emailAddresses, - } - - csrDER, err := x509.CreateCertificateRequest(rand.Reader, csrTemplate, privKey) - if err != nil { - a.logger.Error("failed to create CSR", - "job_id", job.ID, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("CSR creation failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - csrPEM := string(pem.EncodeToMemory(&pem.Block{ - Type: "CERTIFICATE REQUEST", - Bytes: csrDER, - })) - - // Step 4: Submit CSR to the control plane (only the public key leaves the agent) - a.logger.Info("submitting CSR to control plane", - "job_id", job.ID, - "certificate_id", job.CertificateID) - - submitPath := fmt.Sprintf("/api/v1/agents/%s/csr", a.config.AgentID) - resp, err := a.makeRequest(ctx, http.MethodPost, submitPath, map[string]string{ - "csr_pem": csrPEM, - "certificate_id": job.CertificateID, - }) - if err != nil { - a.logger.Error("failed to submit CSR", - "job_id", job.ID, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("CSR submission failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusAccepted { - body, _ := io.ReadAll(resp.Body) - a.logger.Error("CSR submission rejected", - "job_id", job.ID, - "status", resp.StatusCode, - "body", string(body)) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("CSR rejected: %s", string(body))); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - a.logger.Info("CSR submitted and signed successfully", - "job_id", job.ID, - "certificate_id", job.CertificateID, - "key_path", keyPath) -} - -// executeDeploymentJob executes a deployment job by fetching the certificate and deploying it -// to the target system using the appropriate connector (NGINX, F5 BIG-IP, or IIS). -// -// For agent keygen mode, the private key is read from the local key store (keyDir/certID.key) -// rather than fetched from the server. The deployment includes the locally-held key. -// -// Flow: -// 1. Report job as Running -// 2. Fetch the certificate PEM from the control plane -// 3. Load local private key if it exists (agent keygen mode) -// 4. Instantiate the target connector based on target_type from the work response -// 5. Call DeployCertificate on the connector -// 6. Report job as Completed (or Failed) -func (a *Agent) executeDeploymentJob(ctx context.Context, job JobItem) { - a.logger.Info("executing deployment job", - "job_id", job.ID, - "certificate_id", job.CertificateID, - "target_type", job.TargetType) - - // Report job as running - if err := a.reportJobStatus(ctx, job.ID, "Running", ""); err != nil { - a.logger.Error("failed to report job running", "error", err) - } - - // Fetch the certificate from the control plane - certPEM, err := a.fetchCertificate(ctx, job.CertificateID) - if err != nil { - a.logger.Error("failed to fetch certificate", - "job_id", job.ID, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("cert fetch failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - a.logger.Info("certificate fetched for deployment", - "job_id", job.ID, - "cert_length", len(certPEM)) - - // Split PEM into cert and chain (separated by double newline between PEM blocks) - certOnly, chainPEM := splitPEMChain(certPEM) - - // Check for locally-stored private key (agent keygen mode) - keyPath := filepath.Join(a.config.KeyDir, job.CertificateID+".key") - var keyPEM string - keyData, err := os.ReadFile(keyPath) - if err != nil { - a.logger.Error("failed to read local private key for deployment", - "job_id", job.ID, - "key_path", keyPath, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key read failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "error", reportErr) - } - return - } - keyPEM = string(keyData) - a.logger.Info("loaded local private key for deployment", - "job_id", job.ID, - "key_path", keyPath) - - // Deploy to the target using the appropriate connector - if job.TargetType != "" { - connector, err := a.createTargetConnector(ctx, job.TargetType, job.TargetConfig) - if err != nil { - a.logger.Error("failed to create target connector", - "job_id", job.ID, - "target_type", job.TargetType, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("connector init failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - // Bundle 1 / RT-C1 closure (2026-05-12): defense in depth. The server - // runs internal/connector/target/configcheck.Validate on the way IN - // (Create/Update), and rejects shell metacharacters in command-bearing - // fields. Re-run the connector's full ValidateConfig here on the way - // OUT, before any DeployCertificate call. This catches (a) configs - // that pre-date the server-side guard, (b) corruption/tampering of - // the encrypted config blob, and (c) per-connector filesystem - // invariants (cert dir exists, paths writable) that the server can't - // check because the filesystem is on the agent host. - if err := connector.ValidateConfig(ctx, job.TargetConfig); err != nil { - a.logger.Error("connector config validation failed", - "job_id", job.ID, - "target_type", job.TargetType, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("%s config validation failed: %v", job.TargetType, err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - deployReq := target.DeploymentRequest{ - CertPEM: certOnly, - KeyPEM: keyPEM, - ChainPEM: chainPEM, - TargetConfig: job.TargetConfig, - Metadata: map[string]string{ - "certificate_id": job.CertificateID, - "job_id": job.ID, - }, - } - - // Phase 2 of the deploy-hardening I master bundle: - // per-target deploy mutex. Acquire BEFORE - // DeployCertificate so two concurrent renewals against - // the same target ID serialize. The lock is held for the - // full Deploy duration including PreCommit (validate), - // PostCommit (reload), and post-deploy verify (Phases - // 4-9). Released on every return path via defer. - var targetID string - if job.TargetID != nil { - targetID = *job.TargetID - } - if mu := a.targetDeployMutex(targetID); mu != nil { - mu.Lock() - defer mu.Unlock() - } - - result, err := connector.DeployCertificate(ctx, deployReq) - if err != nil { - a.logger.Error("deployment failed", - "job_id", job.ID, - "target_type", job.TargetType, - "error", err) - if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("deployment failed: %v", err)); reportErr != nil { - a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) - } - return - } - - a.logger.Info("target connector deployment completed", - "job_id", job.ID, - "target_type", job.TargetType, - "success", result.Success, - "message", result.Message) - - // If verification is enabled, verify the deployment by probing the live TLS endpoint - targetHost, targetPort, err := extractTargetHostAndPort(job.TargetConfig) - if err != nil { - a.logger.Warn("could not extract target host/port for verification", - "job_id", job.ID, - "error", err) - } else { - a.verifyAndReportDeployment(ctx, job, targetHost, targetPort, certOnly) - } - } else { - a.logger.Info("no target type specified, skipping connector invocation", - "job_id", job.ID) - } - - // Report job as completed - if err := a.reportJobStatus(ctx, job.ID, "Completed", ""); err != nil { - a.logger.Error("failed to report job completed", "error", err) - return - } - - a.logger.Info("deployment job completed", "job_id", job.ID) -} - -// createTargetConnector instantiates the appropriate target connector based on type. -// ctx is threaded into SDK-driven connectors (AWSACM, AzureKeyVault) so credential -// resolution honors caller cancellation / deadlines instead of using a fresh -// context.Background() (the contextcheck linter enforces this — the original Rank 5 -// implementation used Background() and tripped CI on commit 502823d). -func (a *Agent) createTargetConnector(ctx context.Context, targetType string, configJSON json.RawMessage) (target.Connector, error) { - switch targetType { - case "NGINX": - var cfg nginx.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid NGINX config: %w", err) - } - } - return nginx.New(&cfg, a.logger), nil - - case "Apache": - var cfg apache.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid Apache config: %w", err) - } - } - return apache.New(&cfg, a.logger), nil - - case "HAProxy": - var cfg haproxy.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid HAProxy config: %w", err) - } - } - return haproxy.New(&cfg, a.logger), nil - - case "F5": - var cfg f5.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid F5 config: %w", err) - } - } - conn, err := f5.New(&cfg, a.logger) - if err != nil { - return nil, fmt.Errorf("failed to create F5 connector: %w", err) - } - return conn, nil - - case "IIS": - var cfg iis.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid IIS config: %w", err) - } - } - return iis.New(&cfg, a.logger) - - case "Traefik": - var cfg traefik.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid Traefik config: %w", err) - } - } - return traefik.New(&cfg, a.logger), nil - - case "Caddy": - var cfg caddy.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid Caddy config: %w", err) - } - } - return caddy.New(&cfg, a.logger), nil - - case "Envoy": - var cfg envoy.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid Envoy config: %w", err) - } - } - return envoy.New(&cfg, a.logger), nil - - case "Postfix": - var cfg pf.Config - cfg.Mode = "postfix" - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid Postfix config: %w", err) - } - } - return pf.New(&cfg, a.logger), nil - - case "Dovecot": - var cfg pf.Config - cfg.Mode = "dovecot" - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid Dovecot config: %w", err) - } - } - return pf.New(&cfg, a.logger), nil - - case "SSH": - var cfg sshconn.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid SSH config: %w", err) - } - } - return sshconn.New(&cfg, a.logger) - - case "WinCertStore": - var cfg wcs.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid WinCertStore config: %w", err) - } - } - return wcs.New(&cfg, a.logger) - - case "JavaKeystore": - var cfg jks.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid JavaKeystore config: %w", err) - } - } - return jks.New(&cfg, a.logger), nil - - case "KubernetesSecrets": - var cfg k8s.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid KubernetesSecrets config: %w", err) - } - } - return k8s.New(&cfg, a.logger) - - case "AWSACM": - // Rank 5 of the 2026-05-03 Infisical deep-research deliverable. - // AWS Certificate Manager target — SDK-driven (no file I/O). - // LoadDefaultConfig handles the standard AWS credential chain - // (IRSA / EC2 instance profile / SSO / env vars) without any - // long-lived creds in connector Config. - var cfg awsacm.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid AWSACM config: %w", err) - } - } - return awsacm.New(ctx, &cfg, a.logger) - - case "AzureKeyVault": - // Rank 5 of the 2026-05-03 Infisical deep-research deliverable. - // Azure Key Vault target — SDK-driven (no file I/O). - // DefaultAzureCredential handles the standard Azure credential - // chain (managed identity / workload identity / env vars / az - // CLI fallback). Long-lived service-principal secrets are - // supported but discouraged via the credential_mode config. - var cfg azurekv.Config - if len(configJSON) > 0 { - if err := json.Unmarshal(configJSON, &cfg); err != nil { - return nil, fmt.Errorf("invalid AzureKeyVault config: %w", err) - } - } - return azurekv.New(ctx, &cfg, a.logger) - - default: - return nil, fmt.Errorf("unsupported target type: %s", targetType) - } -} - -// splitPEMChain splits a PEM chain into the first certificate (cert) and the rest (chain). -// The control plane returns the full chain as a single string with PEM blocks concatenated. -func splitPEMChain(pemChain string) (string, string) { - data := []byte(pemChain) - block, rest := pem.Decode(data) - if block == nil { - return pemChain, "" - } - cert := string(pem.EncodeToMemory(block)) - - // Skip whitespace between cert and chain - chain := strings.TrimSpace(string(rest)) - if chain == "" { - return cert, "" - } - return cert, chain -} - -// fetchCertificate retrieves the certificate PEM chain from the control plane. -// GET /api/v1/agents/{agentID}/certificates/{certID} -func (a *Agent) fetchCertificate(ctx context.Context, certID string) (string, error) { - path := fmt.Sprintf("/api/v1/agents/%s/certificates/%s", a.config.AgentID, certID) - resp, err := a.makeRequest(ctx, http.MethodGet, path, nil) - if err != nil { - return "", fmt.Errorf("request failed: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - return "", fmt.Errorf("server returned %d: %s", resp.StatusCode, string(body)) - } - - var certResp struct { - CertificatePEM string `json:"certificate_pem"` - } - if err := json.NewDecoder(resp.Body).Decode(&certResp); err != nil { - return "", fmt.Errorf("failed to decode response: %w", err) - } - - return certResp.CertificatePEM, nil -} - // reportJobStatus reports the result of a job back to the control plane. // POST /api/v1/agents/{agentID}/jobs/{jobID}/status func (a *Agent) reportJobStatus(ctx context.Context, jobID string, status string, errorMsg string) error { @@ -1067,239 +430,6 @@ func (a *Agent) makeRequest(ctx context.Context, method, path string, body inter return resp, nil } -// runDiscoveryScan walks configured directories, parses certificate files, and reports -// discovered certificates to the control plane. -// Supports PEM and DER encoded X.509 certificates. -func (a *Agent) runDiscoveryScan(ctx context.Context) { - a.logger.Info("starting filesystem certificate discovery scan", - "directories", a.config.DiscoveryDirs) - - startTime := time.Now() - var certs []discoveredCertEntry - var scanErrors []string - - for _, dir := range a.config.DiscoveryDirs { - a.logger.Debug("scanning directory", "path", dir) - - err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { - if err != nil { - scanErrors = append(scanErrors, fmt.Sprintf("walk error at %s: %v", path, err)) - return nil // continue walking - } - if info.IsDir() { - return nil - } - - // Skip files larger than 1MB (unlikely to be a certificate) - if info.Size() > 1*1024*1024 { - return nil - } - - // Check file extension - ext := strings.ToLower(filepath.Ext(path)) - switch ext { - case ".pem", ".crt", ".cer", ".cert": - found := a.parsePEMFile(path) - certs = append(certs, found...) - case ".der": - if entry, err := a.parseDERFile(path); err == nil { - certs = append(certs, entry) - } else { - a.logger.Debug("skipping non-cert DER file", "path", path, "error", err) - } - default: - // Try PEM parsing for extensionless files or unknown extensions - if ext == "" || ext == ".key" { - return nil // skip key files and extensionless - } - found := a.parsePEMFile(path) - if len(found) > 0 { - certs = append(certs, found...) - } - } - return nil - }) - if err != nil { - scanErrors = append(scanErrors, fmt.Sprintf("failed to walk %s: %v", dir, err)) - } - } - - scanDuration := time.Since(startTime) - a.logger.Info("discovery scan completed", - "certificates_found", len(certs), - "errors", len(scanErrors), - "duration_ms", scanDuration.Milliseconds()) - - if len(certs) == 0 && len(scanErrors) == 0 { - a.logger.Debug("no certificates found and no errors, skipping report") - return - } - - // Build report payload - entries := make([]map[string]interface{}, len(certs)) - for i, c := range certs { - entries[i] = map[string]interface{}{ - "fingerprint_sha256": c.FingerprintSHA256, - "common_name": c.CommonName, - "sans": c.SANs, - "serial_number": c.SerialNumber, - "issuer_dn": c.IssuerDN, - "subject_dn": c.SubjectDN, - "not_before": c.NotBefore, - "not_after": c.NotAfter, - "key_algorithm": c.KeyAlgorithm, - "key_size": c.KeySize, - "is_ca": c.IsCA, - "pem_data": c.PEMData, - "source_path": c.SourcePath, - "source_format": c.SourceFormat, - } - } - - report := map[string]interface{}{ - "agent_id": a.config.AgentID, - "directories": a.config.DiscoveryDirs, - "certificates": entries, - "errors": scanErrors, - "scan_duration_ms": int(scanDuration.Milliseconds()), - } - - // Submit to control plane - path := fmt.Sprintf("/api/v1/agents/%s/discoveries", a.config.AgentID) - resp, err := a.makeRequest(ctx, http.MethodPost, path, report) - if err != nil { - a.logger.Error("failed to submit discovery report", "error", err) - return - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusAccepted { - body, _ := io.ReadAll(resp.Body) - a.logger.Error("discovery report rejected", - "status", resp.StatusCode, - "body", string(body)) - return - } - - a.logger.Info("discovery report submitted successfully", - "certificates", len(certs), - "errors", len(scanErrors)) -} - -// discoveredCertEntry holds parsed certificate metadata for reporting. -type discoveredCertEntry struct { - FingerprintSHA256 string `json:"fingerprint_sha256"` - CommonName string `json:"common_name"` - SANs []string `json:"sans"` - SerialNumber string `json:"serial_number"` - IssuerDN string `json:"issuer_dn"` - SubjectDN string `json:"subject_dn"` - NotBefore string `json:"not_before"` - NotAfter string `json:"not_after"` - KeyAlgorithm string `json:"key_algorithm"` - KeySize int `json:"key_size"` - IsCA bool `json:"is_ca"` - PEMData string `json:"pem_data"` - SourcePath string `json:"source_path"` - SourceFormat string `json:"source_format"` -} - -// parsePEMFile reads a file and extracts all X.509 certificates from PEM blocks. -func (a *Agent) parsePEMFile(path string) []discoveredCertEntry { - data, err := os.ReadFile(path) - if err != nil { - a.logger.Debug("failed to read file", "path", path, "error", err) - return nil - } - - var entries []discoveredCertEntry - rest := data - for { - var block *pem.Block - block, rest = pem.Decode(rest) - if block == nil { - break - } - if block.Type != "CERTIFICATE" { - continue - } - cert, err := x509.ParseCertificate(block.Bytes) - if err != nil { - a.logger.Debug("failed to parse certificate in PEM", "path", path, "error", err) - continue - } - - pemStr := string(pem.EncodeToMemory(block)) - entries = append(entries, certToEntry(cert, path, "PEM", pemStr)) - } - return entries -} - -// parseDERFile reads a DER-encoded certificate file. -func (a *Agent) parseDERFile(path string) (discoveredCertEntry, error) { - data, err := os.ReadFile(path) - if err != nil { - return discoveredCertEntry{}, fmt.Errorf("read failed: %w", err) - } - - cert, err := x509.ParseCertificate(data) - if err != nil { - return discoveredCertEntry{}, fmt.Errorf("parse failed: %w", err) - } - - // Convert to PEM for storage - pemStr := string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: data})) - return certToEntry(cert, path, "DER", pemStr), nil -} - -// certToEntry converts a parsed x509.Certificate into a discoveredCertEntry. -func certToEntry(cert *x509.Certificate, path, format, pemData string) discoveredCertEntry { - // Compute SHA-256 fingerprint - fingerprint := fmt.Sprintf("%x", sha256Sum(cert.Raw)) - - // Determine key algorithm and size - keyAlg, keySize := certKeyInfo(cert) - - return discoveredCertEntry{ - FingerprintSHA256: fingerprint, - CommonName: cert.Subject.CommonName, - SANs: cert.DNSNames, - SerialNumber: cert.SerialNumber.Text(16), - IssuerDN: cert.Issuer.String(), - SubjectDN: cert.Subject.String(), - NotBefore: cert.NotBefore.UTC().Format(time.RFC3339), - NotAfter: cert.NotAfter.UTC().Format(time.RFC3339), - KeyAlgorithm: keyAlg, - KeySize: keySize, - IsCA: cert.IsCA, - PEMData: pemData, - SourcePath: path, - SourceFormat: format, - } -} - -// sha256Sum returns the SHA-256 hash of data. -func sha256Sum(data []byte) [32]byte { - return sha256.Sum256(data) -} - -// certKeyInfo extracts key algorithm name and size from a certificate. -func certKeyInfo(cert *x509.Certificate) (string, int) { - switch pub := cert.PublicKey.(type) { - case *ecdsa.PublicKey: - return "ECDSA", pub.Curve.Params().BitSize - case *rsa.PublicKey: - return "RSA", pub.N.BitLen() - default: - switch cert.PublicKeyAlgorithm { - case x509.Ed25519: - return "Ed25519", 256 - default: - return cert.PublicKeyAlgorithm.String(), 0 - } - } -} - func main() { // Parse command-line flags (with env var fallbacks for Docker deployment) serverURL := flag.String("server", getEnvDefault("CERTCTL_SERVER_URL", "https://localhost:8443"), "Control plane server URL (must be https://)") diff --git a/cmd/agent/poll.go b/cmd/agent/poll.go new file mode 100644 index 0000000..dcfdfab --- /dev/null +++ b/cmd/agent/poll.go @@ -0,0 +1,278 @@ +// Copyright 2026 certctl LLC. All rights reserved. +// SPDX-License-Identifier: BUSL-1.1 + +package main + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/json" + "encoding/pem" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" +) + +// Phase 9 ARCH-M2 closure Sprint 12 (2026-05-14): extracted from +// cmd/agent/main.go via the Option B sibling-file pattern (mirrors +// the Sprint 8 cmd/server cut). Package stays `main`; all methods +// are still defined on *Agent so every call site continues to +// resolve through Go's same-package method-set without any +// import-path change. +// +// This file holds the WORK-POLLING entry point + CSR-job execution +// — the inbound side of the agent's pull-only deployment model +// (per CLAUDE.md "Pull-only deployment model" architecture +// decision): +// +// - pollForWork: queries GET /api/v1/agents/{id}/work each tick; +// dispatches each returned JobItem to the appropriate +// executor (CSR vs deployment). +// - executeCSRJob: handles AwaitingCSR jobs by generating an +// ECDSA P-256 key locally, persisting it to keyDir/.key +// with 0600 permissions (key NEVER leaves the agent — see +// CLAUDE.md "Agent-based key management"), creating the CSR, +// and POSTing it to the control plane for signing. +// +// The deployment-job executor lives in deploy.go alongside the +// target connector factory + deploy-only helpers (splitPEMChain, +// fetchCertificate). The discovery scan lives in discovery.go. + +// pollForWork queries the control plane for actionable jobs and processes them. +// Jobs may be deployment jobs (Pending) or CSR jobs (AwaitingCSR). +// GET /api/v1/agents/{agentID}/work +func (a *Agent) pollForWork(ctx context.Context) { + a.logger.Debug("polling for work", "agent_id", a.config.AgentID) + + path := fmt.Sprintf("/api/v1/agents/%s/work", a.config.AgentID) + resp, err := a.makeRequest(ctx, http.MethodGet, path, nil) + if err != nil { + a.logger.Error("work poll failed", "error", err) + a.consecutiveFailures++ + return + } + defer resp.Body.Close() + + // I-004: same terminal-retirement handling as sendHeartbeat. Work-poll is the + // other hot path that can observe an agent's soft-retirement; if the + // heartbeat tick happens to fire after a work-poll tick within the same + // retirement window, this branch catches it first. markRetired's sync.Once + // guards idempotency so racing both paths in the same tick only closes the + // signal channel once. No consecutiveFailures increment — retirement is + // not a transient failure. + if resp.StatusCode == http.StatusGone { + body, _ := io.ReadAll(resp.Body) + a.markRetired("work_poll", resp.StatusCode, string(body)) + return + } + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + a.logger.Error("work poll rejected", + "status", resp.StatusCode, + "body", string(body)) + a.consecutiveFailures++ + return + } + + var workResp WorkResponse + if err := json.NewDecoder(resp.Body).Decode(&workResp); err != nil { + a.logger.Error("failed to decode work response", "error", err) + a.consecutiveFailures++ + return + } + + a.consecutiveFailures = 0 + + if workResp.Count == 0 { + a.logger.Debug("no pending work") + return + } + + a.logger.Info("received work", "job_count", workResp.Count) + + // Process each job based on type and status + for _, job := range workResp.Jobs { + switch { + case job.Status == "AwaitingCSR": + // Agent keygen mode: generate key locally, create CSR, submit to server + a.executeCSRJob(ctx, job) + case job.Type == "Deployment": + a.executeDeploymentJob(ctx, job) + } + } +} + +// executeCSRJob handles an AwaitingCSR job: generates a private key locally, creates a CSR, +// and submits it to the control plane for signing. The private key is stored on the local +// filesystem with 0600 permissions and NEVER sent to the server. +// +// Flow: +// 1. Generate ECDSA P-256 key pair +// 2. Store private key to disk (keyDir/certID.key) with 0600 permissions +// 3. Create CSR with common name and SANs from work response +// 4. Submit CSR to control plane via POST /agents/{id}/csr +// 5. Server signs the CSR and creates a cert version + deployment jobs +func (a *Agent) executeCSRJob(ctx context.Context, job JobItem) { + a.logger.Info("executing CSR job (agent-side key generation)", + "job_id", job.ID, + "certificate_id", job.CertificateID, + "common_name", job.CommonName) + + // Step 1: Generate ECDSA P-256 key pair + privKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + a.logger.Error("failed to generate private key", + "job_id", job.ID, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key generation failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + a.logger.Info("generated ECDSA P-256 key pair locally", + "job_id", job.ID, + "certificate_id", job.CertificateID) + + // Step 2: Store private key to disk with secure permissions. + // + // Bundle-9 / Audit L-002 + L-003: marshal+write through helpers that + // (a) zeroize the in-heap DER buffer immediately after the PEM block is + // constructed so the private scalar's exposure window is bounded by + // this function call, and (b) assert the key directory is mode 0700 + // before any write touches disk. Also defer-clear the PEM buffer for + // the same reason — the encoded key isn't sensitive in transit (it's + // going to disk) but lingers on the heap if we don't. + keyPath := filepath.Join(a.config.KeyDir, job.CertificateID+".key") + if err := ensureAgentKeyDirSecure(filepath.Dir(keyPath)); err != nil { + a.logger.Error("agent key dir hardening failed", "job_id", job.ID, "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key dir hardening failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + var privKeyPEM []byte + if marshalErr := marshalAgentKeyAndZeroize(privKey, func(der []byte) error { + privKeyPEM = pem.EncodeToMemory(&pem.Block{ + Type: "EC PRIVATE KEY", + Bytes: der, + }) + return nil + }); marshalErr != nil { + a.logger.Error("failed to marshal private key", + "job_id", job.ID, + "error", marshalErr) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key marshal failed: %v", marshalErr)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + defer clear(privKeyPEM) + + if err := os.WriteFile(keyPath, privKeyPEM, 0600); err != nil { + a.logger.Error("failed to write private key to disk", + "job_id", job.ID, + "key_path", keyPath, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("key storage failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + a.logger.Info("private key stored securely", + "job_id", job.ID, + "key_path", keyPath, + "permissions", "0600") + + // Validate common name is present + if job.CommonName == "" { + a.logger.Error("empty common name in CSR job", "job_id", job.ID) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", "empty common name"); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "error", reportErr) + } + return + } + + // Step 3: Create CSR with common name and SANs + // Split SANs into DNS names and email addresses for proper CSR encoding + var dnsNames []string + var emailAddresses []string + for _, san := range job.SANs { + if strings.Contains(san, "@") { + emailAddresses = append(emailAddresses, san) + } else { + dnsNames = append(dnsNames, san) + } + } + + csrTemplate := &x509.CertificateRequest{ + Subject: pkix.Name{ + CommonName: job.CommonName, + }, + DNSNames: dnsNames, + EmailAddresses: emailAddresses, + } + + csrDER, err := x509.CreateCertificateRequest(rand.Reader, csrTemplate, privKey) + if err != nil { + a.logger.Error("failed to create CSR", + "job_id", job.ID, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("CSR creation failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + csrPEM := string(pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE REQUEST", + Bytes: csrDER, + })) + + // Step 4: Submit CSR to the control plane (only the public key leaves the agent) + a.logger.Info("submitting CSR to control plane", + "job_id", job.ID, + "certificate_id", job.CertificateID) + + submitPath := fmt.Sprintf("/api/v1/agents/%s/csr", a.config.AgentID) + resp, err := a.makeRequest(ctx, http.MethodPost, submitPath, map[string]string{ + "csr_pem": csrPEM, + "certificate_id": job.CertificateID, + }) + if err != nil { + a.logger.Error("failed to submit CSR", + "job_id", job.ID, + "error", err) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("CSR submission failed: %v", err)); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusAccepted { + body, _ := io.ReadAll(resp.Body) + a.logger.Error("CSR submission rejected", + "job_id", job.ID, + "status", resp.StatusCode, + "body", string(body)) + if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("CSR rejected: %s", string(body))); reportErr != nil { + a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr) + } + return + } + + a.logger.Info("CSR submitted and signed successfully", + "job_id", job.ID, + "certificate_id", job.CertificateID, + "key_path", keyPath) +}