diff --git a/README.md b/README.md index c14e7f8..a4a320b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A self-hosted certificate lifecycle platform. Track, renew, and deploy TLS certi ## What It Does -certctl gives you a single pane of glass for every TLS certificate in your organization. The **web dashboard** shows your full certificate inventory — what's healthy, what's expiring, what's already expired, and who owns each one. The **REST API** (50+ endpoints) lets you automate everything. **Agents** deployed on your infrastructure handle key generation and certificate deployment without exposing private keys to the control plane. +certctl gives you a single pane of glass for every TLS certificate in your organization. The **web dashboard** shows your full certificate inventory — what's healthy, what's expiring, what's already expired, and who owns each one. The **REST API** (40+ endpoints) lets you automate everything. **Agents** deployed on your infrastructure handle certificate deployment, and in V2+ will handle key generation locally so private keys never leave your servers. ```mermaid flowchart LR @@ -71,7 +71,8 @@ make migrate-up export CERTCTL_SERVER_URL=http://localhost:8443 export CERTCTL_API_KEY=change-me-in-production export CERTCTL_AGENT_NAME=local-agent -./bin/agent +export CERTCTL_AGENT_ID=agent-local-01 +./bin/agent --agent-id=agent-local-01 ``` ## Documentation @@ -114,7 +115,7 @@ flowchart TB ### Key Design Decisions -- **Private keys never touch the control plane.** Agents generate keys locally and submit CSRs (public key only). The control plane forwards CSRs to the CA and returns signed certificates. Even if the control plane database is compromised, no private keys are exposed. +- **Private keys isolated from the control plane (V2+ goal).** In V1, the Local CA issuer generates server-side keys for simplicity. V2+ moves key generation to agents — agents generate keys locally and submit CSRs (public key only). The architecture is designed for this separation; V1 takes a pragmatic shortcut for the built-in CA. - **TEXT primary keys, not UUIDs.** IDs are human-readable prefixed strings (`mc-api-prod`, `t-platform`, `o-alice`) so you can identify resource types at a glance in logs and queries. - **Handler → Service → Repository layering.** Handlers define their own service interfaces for clean dependency inversion. No global service singletons. - **Idempotent migrations.** All schema uses `IF NOT EXISTS` and seed data uses `ON CONFLICT (id) DO NOTHING`, safe for repeated execution. @@ -152,6 +153,8 @@ All server environment variables use the `CERTCTL_` prefix: | `CERTCTL_LOG_FORMAT` | `json` | Log format: `json` or `text` | | `CERTCTL_AUTH_TYPE` | `api-key` | Auth mode: `api-key`, `jwt`, or `none` | | `CERTCTL_AUTH_SECRET` | — | Required for `api-key` and `jwt` auth types | +| `CERTCTL_ACME_DIRECTORY_URL` | — | ACME directory URL (e.g., Let's Encrypt staging) | +| `CERTCTL_ACME_EMAIL` | — | Contact email for ACME account registration | Agent environment variables: @@ -160,6 +163,7 @@ Agent environment variables: | `CERTCTL_SERVER_URL` | `http://localhost:8080` | Control plane URL | | `CERTCTL_API_KEY` | — | Agent API key | | `CERTCTL_AGENT_NAME` | `certctl-agent` | Agent display name | +| `CERTCTL_AGENT_ID` | — | Registered agent ID (required) | Docker Compose overrides these for the demo stack (see `deploy/docker-compose.yml`): port `8443`, auth type `none`, database pointing to the postgres container. @@ -187,6 +191,8 @@ GET /api/v1/agents/{id} Get POST /api/v1/agents/{id}/heartbeat Record heartbeat POST /api/v1/agents/{id}/csr Submit CSR for issuance GET /api/v1/agents/{id}/certificates/{certId} Retrieve signed certificate +GET /api/v1/agents/{id}/work Poll for pending deployment jobs +POST /api/v1/agents/{id}/jobs/{jobId}/status Report job completion/failure ``` ### Infrastructure @@ -235,7 +241,7 @@ GET /ready Readiness check | Issuer | Status | Type | |--------|--------|------| | Local CA (self-signed) | Implemented | `GenericCA` | -| ACME v2 (Let's Encrypt, Sectigo) | In progress | `ACME` | +| ACME v2 (Let's Encrypt, Sectigo) | Implemented (HTTP-01) | `ACME` | | Vault PKI | Planned | — | | DigiCert | Planned | — | @@ -286,9 +292,8 @@ make docker-clean # Stop + remove volumes ## Security ### Private Key Management -- Private keys are generated exclusively on agents, never sent to the control plane -- Keys stored with file permissions 0600 -- Old keys deleted after successful certificate renewal +- **V1 (Local CA)**: The control plane generates ephemeral RSA-2048 keys server-side for certificate issuance. This simplifies the initial implementation but means private keys exist on the control plane temporarily. Keys are stored in certificate version records. +- **V2+**: Private keys will be generated exclusively on agents, never sent to the control plane. Keys stored with file permissions 0600 and rotated after successful renewal. ### Authentication - Agent-to-server: API key (registered at agent creation) @@ -304,7 +309,7 @@ make docker-clean # Stop + remove volumes Summary: -- **V1 (current)**: Dashboard, inventory, alerting, Local CA + ACME issuers, NGINX/F5/IIS targets, agents, REST API, policies, audit trail, Docker Compose +- **V1 (current)**: Dashboard, inventory, alerting, Local CA issuer (end-to-end lifecycle wired), NGINX/F5/IIS target connectors, agents with work polling, REST API (40+ endpoints), policies, audit trail, Docker Compose - **V2**: Charts/trends, bulk import, OIDC/SSO, deployment rollback, CLI, Slack/Teams - **V3**: Certificate discovery, network scanning, unknown cert detection - **V4+**: Kubernetes CRD, Terraform provider, multi-region, HA control plane, HSM support diff --git a/docs/architecture.md b/docs/architecture.md index d569f72..ab60f3d 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -8,7 +8,7 @@ New to certificates? Read the [Concepts Guide](concepts.md) first. ### Design Principles -1. **Zero Private Key Exposure** — Private keys are generated and managed only on agents, never sent to the control plane +1. **Private Key Isolation (V2+ goal)** — In V1, the Local CA generates server-side keys for simplicity. V2+ moves key generation to agents so private keys never touch the control plane 2. **Decoupled Operations** — Agents operate autonomously; the control plane coordinates but doesn't block agent function 3. **Audit-First** — Complete traceability of all issuance, deployment, and rotation events 4. **Connector Architecture** — Pluggable issuers, targets, and notifiers for extensibility @@ -73,9 +73,9 @@ The server exposes a REST API under `/api/v1/` and optionally serves the web das ### Agents -Lightweight Go processes that run on or near your infrastructure. An agent generates private keys locally, creates CSRs, receives signed certificates from the control plane, deploys them to target systems, and reports status back. Agents communicate with the control plane via HTTP and authenticate with API keys. +Lightweight Go processes that run on or near your infrastructure. Agents poll the control plane for pending deployment jobs, fetch signed certificates, deploy them to target systems, and report job status back. In V2+, agents will also generate private keys locally and create CSRs. Agents communicate with the control plane via HTTP and authenticate with API keys. -The agent runs two background loops: a heartbeat (every 60 seconds) to signal it's alive, and a work poll (every 30 seconds) to check for pending jobs. +The agent runs two background loops: a heartbeat (every 60 seconds) to signal it's alive, and a work poll (every 30 seconds) to check for pending deployment jobs via `GET /api/v1/agents/{id}/work`. When a job is found, the agent fetches the certificate, executes the deployment, and reports status via `POST /api/v1/agents/{id}/jobs/{job_id}/status`. ### Web Dashboard @@ -223,7 +223,38 @@ sequenceDiagram API-->>U: 201 Created + JSON body ``` -### 2. Agent Requests Certificate (CSR → Issuance) +### 2. Certificate Issuance + +#### V1: Server-Side Key Generation (Local CA) + +In V1, the control plane generates keys and CSRs server-side for the Local CA. This simplifies the initial implementation — the full agent-side key generation flow is planned for V2+. + +```mermaid +sequenceDiagram + participant U as User / Scheduler + participant API as Control Plane API + participant SVC as RenewalService + participant ISS as IssuerConnector + participant DB as PostgreSQL + + U->>API: POST /api/v1/certificates/{id}/renew + API->>SVC: ProcessRenewalJob(job) + + SVC->>SVC: Generate RSA-2048 key pair (server-side) + SVC->>SVC: Create CSR with CN + SANs + SVC->>ISS: IssueCertificate(commonName, sans, csrPEM) + ISS-->>SVC: IssuanceResult{cert_pem, chain_pem, serial, not_after} + + SVC->>SVC: Compute SHA-256 fingerprint + SVC->>DB: INSERT INTO certificate_versions (PEM chain + CSR) + SVC->>DB: UPDATE managed_certificates SET status='Active', expires_at + SVC->>DB: INSERT INTO audit_events + SVC->>DB: CREATE deployment jobs for all mapped targets + + Note over SVC: Deployment jobs picked up by agents
via GET /api/v1/agents/{id}/work +``` + +#### V2+ (Planned): Agent-Side Key Generation ```mermaid sequenceDiagram @@ -232,22 +263,19 @@ sequenceDiagram participant ISS as Issuer Connector participant DB as PostgreSQL - A->>A: Generate RSA-2048 key pair + A->>A: Generate RSA-2048 key pair locally A->>A: Create CSR (CN + SANs, public key only) - A->>API: POST /api/v1/agents/{id}/csr
{csr_pem: "-----BEGIN..."} + A->>API: POST /api/v1/agents/{id}/csr
{csr_pem, certificate_id} - API->>API: Validate CSR format API->>ISS: IssueCertificate(IssuanceRequest{CSR}) ISS-->>API: IssuanceResult{cert_pem, chain_pem, serial, not_after} API->>DB: INSERT INTO certificate_versions API->>DB: UPDATE managed_certificates SET status='Active' - API->>DB: INSERT INTO audit_events API-->>A: {certificate_pem, chain_pem}
(NO private key in response) - A->>A: Store cert.pem + chain.pem locally - Note over A: key.pem stays on agent
Never transmitted anywhere + A->>A: Store cert + chain locally (key never leaves agent) A->>A: Deploy to target system ``` @@ -320,6 +348,26 @@ flowchart TB end ``` +### IssuerConnectorAdapter (Dependency Inversion) + +The service layer defines its own `IssuerConnector` interface (`internal/service/renewal.go`) while the connector layer has its own `issuer.Connector` interface (`internal/connector/issuer/interface.go`). The `IssuerConnectorAdapter` (`internal/service/issuer_adapter.go`) bridges the two, translating between their request/response types. This maintains clean dependency inversion — the service package never imports the connector package directly. + +```mermaid +flowchart LR + SVC["Service Layer
service.IssuerConnector"] --> ADAPT["IssuerConnectorAdapter
(bridges interfaces)"] + ADAPT --> CONN["Connector Layer
issuer.Connector"] + CONN --> LC["Local CA"] + CONN --> ACME["ACME v2"] +``` + +Registration happens in `cmd/server/main.go`: +```go +localCA := local.New(nil, logger) +issuerRegistry := map[string]service.IssuerConnector{ + "iss-local": service.NewIssuerConnectorAdapter(localCA), +} +``` + ### Issuer Connector Handles certificate issuance from CAs. @@ -394,14 +442,16 @@ flowchart LR style ROT fill:#efe,stroke:#3c3 ``` -Private keys follow a strict lifecycle: +**V1 (Current):** The Local CA issuer generates RSA-2048 keys and CSRs server-side within `RenewalService.ProcessRenewalJob`. Private key material is stored alongside the CSR in the `certificate_versions` table. This is a pragmatic V1 trade-off to get the end-to-end lifecycle working. + +**V2+ (Target Architecture):** Private keys follow a strict lifecycle on agents: 1. **Generated on the agent** — never sent to the control plane 2. **Stored on the agent** — file permissions 0600, owned by the agent process user 3. **Used by the agent** — for deployment to targets and CSR generation 4. **Rotated by the agent** — old keys deleted after successful renewal -The control plane only ever handles public material: certificates, chains, and CSRs. This is a deliberate architectural decision — even if the control plane database is compromised, no private keys are exposed. +The V2+ architecture ensures the control plane only handles public material: certificates, chains, and CSRs. ### Authentication diff --git a/docs/connectors.md b/docs/connectors.md index c48aef9..40bf0c6 100644 --- a/docs/connectors.md +++ b/docs/connectors.md @@ -95,6 +95,29 @@ Configuration: Location: `internal/connector/issuer/local/local.go` +### Built-in: ACME v2 (Let's Encrypt, Sectigo, ZeroSSL) + +The ACME connector implements the full ACME v2 protocol using Go's `golang.org/x/crypto/acme` package. It supports HTTP-01 challenge solving via a built-in temporary HTTP server that starts on demand during certificate issuance. + +Configuration: +```json +{ + "directory_url": "https://acme-staging-v02.api.letsencrypt.org/directory", + "email": "admin@example.com", + "http_port": 80 +} +``` + +For HTTP-01 to work, the domain being validated must resolve to the machine running the connector, and the configured HTTP port must be reachable from the internet. The connector automatically registers an ACME account, creates orders, solves challenges, finalizes with the CSR, and downloads the issued certificate chain. + +Environment variables for the default ACME connector: +- `CERTCTL_ACME_DIRECTORY_URL` — ACME directory URL +- `CERTCTL_ACME_EMAIL` — Contact email for account registration + +The connector is registered in the issuer registry under `iss-acme-staging` and `iss-acme-prod`. Use `iss-acme-staging` for Let's Encrypt staging (rate-limit-friendly testing) and `iss-acme-prod` for production certificates. + +Location: `internal/connector/issuer/acme/acme.go` + ### Building a Custom Issuer Here's the structure for a HashiCorp Vault PKI issuer: @@ -293,16 +316,36 @@ To add a new connector: 2. Implement the interface (all methods required) -3. Register it in the service layer during server initialization in `cmd/server/main.go`: +3. Register it in the service layer during server initialization in `cmd/server/main.go`. + +### IssuerConnectorAdapter + +Issuer connectors use an adapter pattern to bridge the connector-layer `issuer.Connector` interface with the service-layer `service.IssuerConnector` interface. This maintains dependency inversion — the service package never imports the connector package directly. + +The adapter (`internal/service/issuer_adapter.go`) translates between the two interface types: ```go -// For issuers -issuerRegistry := map[string]service.IssuerConnector{ - "local": localCAConnector, - "acme": acmeConnector, - "vault": vaultConnector, // your new issuer -} +// Wrap your connector implementation with the adapter +import "github.com/shankar0123/certctl/internal/service" +myIssuer := myissuer.New(config) +adapted := service.NewIssuerConnectorAdapter(myIssuer) +``` + +Register adapted connectors keyed by the issuer ID from the database: + +```go +// In cmd/server/main.go +localCA := local.New(nil, logger) +issuerRegistry := map[string]service.IssuerConnector{ + "iss-local": service.NewIssuerConnectorAdapter(localCA), + "iss-vault": service.NewIssuerConnectorAdapter(vaultIssuer), // your new issuer +} +``` + +### Notifier Registration + +```go // For notifiers notifierRegistry := map[string]service.Notifier{ "Email": emailNotifier, diff --git a/docs/demo-advanced.md b/docs/demo-advanced.md index ccc28c2..ceebc63 100644 --- a/docs/demo-advanced.md +++ b/docs/demo-advanced.md @@ -215,13 +215,13 @@ Expected response: The `202 Accepted` status code is deliberate. Certificate issuance can take seconds (Local CA) to minutes (ACME DNS challenges). The API doesn't block the caller — it creates a job and returns. The job processor loop (runs every 30 seconds) picks up pending jobs and executes them. -**What happens during a real renewal (production flow):** +**What happens during renewal (V1 flow with Local CA):** ```mermaid sequenceDiagram participant S as Scheduler participant DB as PostgreSQL - participant SVC as CertificateService + participant SVC as RenewalService participant ISS as IssuerConnector participant A as Agent @@ -233,23 +233,25 @@ sequenceDiagram S->>DB: SELECT pending jobs DB-->>S: [job-123: Renewal for mc-demo-api] - S->>A: Notify: generate CSR for demo-api.internal.example.com - A->>A: Generate RSA-2048 key pair locally - A->>A: Create CSR with CN + SANs - A->>SVC: POST /api/v1/agents/{id}/csr {csr_pem: "..."} - - SVC->>ISS: IssueCertificate(CSR) + SVC->>SVC: Generate RSA-2048 key + CSR (server-side in V1) + SVC->>ISS: IssueCertificate(commonName, sans, csrPEM) ISS-->>SVC: {cert_pem, chain_pem, serial, not_after} - SVC->>DB: INSERT certificate_version + SVC->>DB: INSERT certificate_version (PEM chain + fingerprint) SVC->>DB: UPDATE managed_certificates SET status='Active' SVC->>DB: INSERT audit_event (certificate_renewed) + SVC->>DB: CREATE deployment jobs for all targets - SVC-->>A: {certificate_pem, chain_pem} - A->>A: Store cert + chain locally (key never leaves) + Note over A: Agent polls GET /agents/{id}/work + A->>SVC: GET /api/v1/agents/{id}/work + SVC-->>A: [deployment job for mc-demo-api] + A->>SVC: GET /api/v1/agents/{id}/certificates/{certId} + SVC-->>A: {certificate PEM chain} + A->>A: Deploy to target system + A->>SVC: POST /api/v1/agents/{id}/jobs/{jobId}/status {Completed} ``` -The critical security property: the private key is generated by the agent in step 3 and never transmitted. The CSR contains only the public key. The control plane forwards the CSR to the issuer and returns the signed certificate — it never has access to the private key material. +**V1 note:** In V1 with the Local CA, key generation happens server-side in `RenewalService.ProcessRenewalJob`. In V2+, agents will generate keys locally and submit CSRs, ensuring private keys never touch the control plane. Check the jobs list: @@ -322,6 +324,29 @@ Check for deployment jobs: curl -s "$API/api/v1/jobs" | jq '.data[] | select(.certificate_id == "mc-demo-api")' ``` +### Agent Work Polling & Status Reporting + +In production, agents poll for work and report results. You can simulate this manually: + +```bash +# Poll for pending deployment work (as an agent) +curl -s "$API/api/v1/agents/agent-nginx-prod/work" | jq . +``` + +This returns pending deployment jobs assigned to the agent. The agent would then fetch the certificate, deploy it, and report back: + +```bash +# Report job completion (replace JOB_ID with an actual job ID from the work response) +curl -s -X POST "$API/api/v1/agents/agent-nginx-prod/jobs/JOB_ID/status" \ + -H "Content-Type: application/json" \ + -d '{ + "status": "Completed", + "error": "" + }' | jq . +``` + +**How it works:** The `GET /api/v1/agents/{id}/work` endpoint returns all pending deployment jobs. The agent processes each one, then calls `POST /api/v1/agents/{id}/jobs/{job_id}/status` with either `"Completed"` or `"Failed"` (with an error message). The control plane updates the job record and logs an audit event. + --- ## Part 6: View the Audit Trail diff --git a/docs/quickstart.md b/docs/quickstart.md index d9d71c2..f6e8ab9 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -111,6 +111,13 @@ curl -s http://localhost:8443/api/v1/certificates/mc-api-prod | jq . curl -s http://localhost:8443/api/v1/agents | jq . ``` +### Check agent pending work + +```bash +# Replace with an actual agent ID from the list above +curl -s http://localhost:8443/api/v1/agents/agent-nginx-prod/work | jq . +``` + ### View audit trail ```bash diff --git a/go.mod b/go.mod index 147f1e0..f67cd00 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,10 @@ module github.com/shankar0123/certctl -go 1.22.5 +go 1.25.0 require ( github.com/google/uuid v1.6.0 github.com/lib/pq v1.10.9 ) + +require golang.org/x/crypto v0.49.0 diff --git a/go.sum b/go.sum index ae20c4c..7595e9a 100644 --- a/go.sum +++ b/go.sum @@ -2,3 +2,5 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= diff --git a/internal/api/handler/agents.go b/internal/api/handler/agents.go index 8d5ba91..00c8170 100644 --- a/internal/api/handler/agents.go +++ b/internal/api/handler/agents.go @@ -17,7 +17,11 @@ type AgentService interface { RegisterAgent(agent domain.Agent) (*domain.Agent, error) Heartbeat(agentID string) error CSRSubmit(agentID string, csrPEM string) (string, error) + CSRSubmitForCert(agentID string, certID string, csrPEM string) (string, error) CertificatePickup(agentID, certID string) (string, error) + GetWork(agentID string) ([]domain.Job, error) + GetWorkWithTargets(agentID string) ([]domain.WorkItem, error) + UpdateJobStatus(agentID string, jobID string, status string, errMsg string) error } // AgentHandler handles HTTP requests for agent operations. @@ -155,6 +159,7 @@ func (h AgentHandler) Heartbeat(w http.ResponseWriter, r *http.Request) { // AgentCSRSubmit receives a Certificate Signing Request from an agent. // POST /api/v1/agents/{id}/csr +// Optionally accepts a certificate_id to sign the CSR for a specific certificate. func (h AgentHandler) AgentCSRSubmit(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { Error(w, http.StatusMethodNotAllowed, "Method not allowed") @@ -173,7 +178,8 @@ func (h AgentHandler) AgentCSRSubmit(w http.ResponseWriter, r *http.Request) { agentID := parts[0] var req struct { - CSRPEM string `json:"csr_pem"` + CSRPEM string `json:"csr_pem"` + CertificateID string `json:"certificate_id,omitempty"` } if err := json.NewDecoder(r.Body).Decode(&req); err != nil { ErrorWithRequestID(w, http.StatusBadRequest, "Invalid request body", requestID) @@ -185,15 +191,23 @@ func (h AgentHandler) AgentCSRSubmit(w http.ResponseWriter, r *http.Request) { return } - jobID, err := h.svc.CSRSubmit(agentID, req.CSRPEM) + var status string + var err error + + // If certificate_id is provided, sign the CSR for that specific certificate + if req.CertificateID != "" { + status, err = h.svc.CSRSubmitForCert(agentID, req.CertificateID, req.CSRPEM) + } else { + status, err = h.svc.CSRSubmit(agentID, req.CSRPEM) + } + if err != nil { ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to submit CSR", requestID) return } response := map[string]string{ - "job_id": jobID, - "status": "csr_received", + "status": status, } JSON(w, http.StatusAccepted, response) @@ -231,3 +245,82 @@ func (h AgentHandler) AgentCertificatePickup(w http.ResponseWriter, r *http.Requ JSON(w, http.StatusOK, response) } + +// AgentGetWork returns pending deployment jobs for an agent. +// GET /api/v1/agents/{id}/work +func (h AgentHandler) AgentGetWork(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + Error(w, http.StatusMethodNotAllowed, "Method not allowed") + return + } + + requestID := middleware.GetRequestID(r.Context()) + + // Extract agent ID from path /api/v1/agents/{id}/work + path := strings.TrimPrefix(r.URL.Path, "/api/v1/agents/") + parts := strings.Split(path, "/") + if len(parts) < 2 || parts[0] == "" { + ErrorWithRequestID(w, http.StatusBadRequest, "Agent ID is required", requestID) + return + } + agentID := parts[0] + + workItems, err := h.svc.GetWorkWithTargets(agentID) + if err != nil { + ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get pending work", requestID) + return + } + + if workItems == nil { + workItems = []domain.WorkItem{} + } + + JSON(w, http.StatusOK, map[string]interface{}{ + "jobs": workItems, + "count": len(workItems), + }) +} + +// AgentReportJobStatus receives a job status report from an agent. +// POST /api/v1/agents/{id}/jobs/{job_id}/status +func (h AgentHandler) AgentReportJobStatus(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + Error(w, http.StatusMethodNotAllowed, "Method not allowed") + return + } + + requestID := middleware.GetRequestID(r.Context()) + + // Extract agent ID and job ID from path /api/v1/agents/{id}/jobs/{job_id}/status + path := strings.TrimPrefix(r.URL.Path, "/api/v1/agents/") + parts := strings.Split(path, "/") + if len(parts) < 4 || parts[0] == "" || parts[2] == "" { + ErrorWithRequestID(w, http.StatusBadRequest, "Agent ID and Job ID are required", requestID) + return + } + agentID := parts[0] + jobID := parts[2] + + var req struct { + Status string `json:"status"` + Error string `json:"error,omitempty"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + ErrorWithRequestID(w, http.StatusBadRequest, "Invalid request body", requestID) + return + } + + if req.Status == "" { + ErrorWithRequestID(w, http.StatusBadRequest, "Status is required", requestID) + return + } + + if err := h.svc.UpdateJobStatus(agentID, jobID, req.Status, req.Error); err != nil { + ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update job status", requestID) + return + } + + JSON(w, http.StatusOK, map[string]string{ + "status": "updated", + }) +} diff --git a/internal/api/router/router.go b/internal/api/router/router.go index ea37e5a..53adf5a 100644 --- a/internal/api/router/router.go +++ b/internal/api/router/router.go @@ -101,6 +101,8 @@ func (r *Router) RegisterHandlers( r.Register("POST /api/v1/agents/{id}/heartbeat", http.HandlerFunc(agents.Heartbeat)) r.Register("POST /api/v1/agents/{id}/csr", http.HandlerFunc(agents.AgentCSRSubmit)) r.Register("GET /api/v1/agents/{id}/certificates/{cert_id}", http.HandlerFunc(agents.AgentCertificatePickup)) + r.Register("GET /api/v1/agents/{id}/work", http.HandlerFunc(agents.AgentGetWork)) + r.Register("POST /api/v1/agents/{id}/jobs/{job_id}/status", http.HandlerFunc(agents.AgentReportJobStatus)) // Jobs routes: /api/v1/jobs r.Register("GET /api/v1/jobs", http.HandlerFunc(jobs.ListJobs)) diff --git a/internal/connector/issuer/acme/acme.go b/internal/connector/issuer/acme/acme.go index e4798c9..8630f5d 100644 --- a/internal/connector/issuer/acme/acme.go +++ b/internal/connector/issuer/acme/acme.go @@ -2,43 +2,65 @@ package acme import ( "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" "encoding/json" + "encoding/pem" "fmt" "log/slog" + "net" "net/http" + "sync" "time" + "golang.org/x/crypto/acme" + "github.com/shankar0123/certctl/internal/connector/issuer" ) // Config represents the ACME issuer connector configuration. type Config struct { - DirectoryURL string `json:"directory_url"` - Email string `json:"email"` - EABKid string `json:"eab_kid,omitempty"` - EABHmac string `json:"eab_hmac,omitempty"` + DirectoryURL string `json:"directory_url"` // ACME directory URL (e.g., https://acme-staging-v02.api.letsencrypt.org/directory) + Email string `json:"email"` // Contact email for the ACME account + EABKid string `json:"eab_kid,omitempty"` // External Account Binding Key ID (for some CAs) + EABHmac string `json:"eab_hmac,omitempty"` // External Account Binding HMAC Key + HTTPPort int `json:"http_port,omitempty"` // Port for HTTP-01 challenge server (default: 80) } -// Connector implements the issuer.Connector interface for ACME-compatible CAs. -// This is a stub implementation that demonstrates the structure; actual ACME protocol -// implementation will use a proper ACME library (e.g., golang.org/x/crypto/acme). +// Connector implements the issuer.Connector interface for ACME-compatible CAs +// (Let's Encrypt, Sectigo, ZeroSSL, etc.). +// +// It supports HTTP-01 challenge solving via a built-in temporary HTTP server. +// The challenge server starts when needed and stops after validation completes. +// +// For HTTP-01 to work, the domain(s) being validated must resolve to the machine +// running this connector, and the configured HTTP port must be reachable from the internet. type Connector struct { - config *Config - logger *slog.Logger - client *http.Client + config *Config + logger *slog.Logger + client *acme.Client + accountKey *ecdsa.PrivateKey + + // HTTP-01 challenge solver state + challengeMu sync.RWMutex + challengeTokens map[string]string // token → key authorization } // New creates a new ACME connector with the given configuration and logger. func New(config *Config, logger *slog.Logger) *Connector { + if config != nil && config.HTTPPort == 0 { + config.HTTPPort = 80 + } return &Connector{ - config: config, - logger: logger, - client: &http.Client{Timeout: 30 * time.Second}, + config: config, + logger: logger, + challengeTokens: make(map[string]string), } } // ValidateConfig checks that the ACME directory URL is reachable and valid. -// It performs a HEAD request to the directory URL to verify connectivity. func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessage) error { var cfg Config if err := json.Unmarshal(rawConfig, &cfg); err != nil { @@ -56,12 +78,13 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag c.logger.Info("validating ACME configuration", "directory_url", cfg.DirectoryURL) // Verify that the directory URL is reachable - req, err := http.NewRequestWithContext(ctx, http.MethodHead, cfg.DirectoryURL, nil) + httpClient := &http.Client{Timeout: 10 * time.Second} + req, err := http.NewRequestWithContext(ctx, http.MethodGet, cfg.DirectoryURL, nil) if err != nil { return fmt.Errorf("failed to create request: %w", err) } - resp, err := c.client.Do(req) + resp, err := httpClient.Do(req) if err != nil { return fmt.Errorf("failed to reach ACME directory: %w", err) } @@ -71,116 +94,365 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag return fmt.Errorf("ACME directory returned status %d", resp.StatusCode) } + if cfg.HTTPPort == 0 { + cfg.HTTPPort = 80 + } + c.config = &cfg c.logger.Info("ACME configuration validated") return nil } +// ensureClient initializes the ACME client and account key if not already done. +func (c *Connector) ensureClient(ctx context.Context) error { + if c.client != nil { + return nil + } + + // Generate an ECDSA P-256 account key + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + return fmt.Errorf("failed to generate account key: %w", err) + } + c.accountKey = key + + c.client = &acme.Client{ + Key: key, + DirectoryURL: c.config.DirectoryURL, + } + + // Register or retrieve the ACME account + acct := &acme.Account{ + Contact: []string{"mailto:" + c.config.Email}, + } + _, err = c.client.Register(ctx, acct, acme.AcceptTOS) + if err != nil { + // Account may already exist, try to get it + _, getErr := c.client.GetReg(ctx, "") + if getErr != nil { + return fmt.Errorf("failed to register ACME account: %w (get existing: %v)", err, getErr) + } + c.logger.Info("using existing ACME account") + } else { + c.logger.Info("registered new ACME account", "email", c.config.Email) + } + + return nil +} + // IssueCertificate submits a certificate issuance request to the ACME CA. // -// The flow for ACME is: -// 1. Create a new order with the CA, specifying the identifiers (SANs + CN) -// 2. The CA returns authorization challenges (DNS, HTTP, etc.) -// 3. Solve the challenges (stub: in production, the agent or external solver handles this) -// 4. Finalize the order by submitting the CSR -// 5. Download the issued certificate and chain -// -// TODO: Implement actual ACME protocol using golang.org/x/crypto/acme. -// This stub documents the expected flow but doesn't execute it. +// Flow: +// 1. Create a new order with the CA for the requested identifiers +// 2. Solve HTTP-01 challenges for each authorization +// 3. Finalize the order by submitting the CSR +// 4. Download the issued certificate and chain func (c *Connector) IssueCertificate(ctx context.Context, request issuer.IssuanceRequest) (*issuer.IssuanceResult, error) { c.logger.Info("processing ACME issuance request", "common_name", request.CommonName, "san_count", len(request.SANs)) - // TODO: Implement ACME order creation. - // For now, return a stub response to demonstrate the interface. - // In production: - // 1. Connect to the ACME directory - // 2. Create a new order with identifiers from CommonName and SANs - // 3. Get authorization challenges - // 4. Wait for challenge completion (agent/solver will handle) - // 5. Submit CSR to finalize order - // 6. Retrieve issued certificate and chain + if err := c.ensureClient(ctx); err != nil { + return nil, fmt.Errorf("ACME client init: %w", err) + } - c.logger.Warn("ACME issuance not yet implemented", "common_name", request.CommonName) + // Build the list of identifiers (domains) + identifiers := buildIdentifiers(request.CommonName, request.SANs) + + // Step 1: Create order + order, err := c.client.AuthorizeOrder(ctx, identifiers) + if err != nil { + return nil, fmt.Errorf("failed to create ACME order: %w", err) + } + c.logger.Info("ACME order created", "order_url", order.URI, "status", order.Status) + + // Step 2: Solve authorizations (HTTP-01 challenges) + if order.Status == acme.StatusPending { + if err := c.solveAuthorizations(ctx, order.AuthzURLs); err != nil { + return nil, fmt.Errorf("failed to solve challenges: %w", err) + } + + // Wait for the order to be ready + order, err = c.client.WaitOrder(ctx, order.URI) + if err != nil { + return nil, fmt.Errorf("order failed after challenge: %w", err) + } + } + + if order.Status != acme.StatusReady { + return nil, fmt.Errorf("order not ready, status: %s", order.Status) + } + + // Step 3: Parse CSR and finalize order + csrDER, err := parseCSRPEM(request.CSRPEM) + if err != nil { + return nil, fmt.Errorf("failed to parse CSR: %w", err) + } + + derChain, _, err := c.client.CreateOrderCert(ctx, order.FinalizeURL, csrDER, true) + if err != nil { + return nil, fmt.Errorf("failed to finalize order: %w", err) + } + + if len(derChain) == 0 { + return nil, fmt.Errorf("ACME returned empty certificate chain") + } + + // Step 4: Convert DER chain to PEM + certPEM, chainPEM, serial, notBefore, notAfter, err := parseDERChain(derChain) + if err != nil { + return nil, fmt.Errorf("failed to parse certificate chain: %w", err) + } + + c.logger.Info("ACME certificate issued", + "common_name", request.CommonName, + "serial", serial, + "not_after", notAfter) - // Stub: Return a placeholder result return &issuer.IssuanceResult{ - CertPEM: "-----BEGIN CERTIFICATE-----\n(stub)\n-----END CERTIFICATE-----\n", - ChainPEM: "-----BEGIN CERTIFICATE-----\n(stub chain)\n-----END CERTIFICATE-----\n", - Serial: "stub-serial-123456", - NotBefore: time.Now(), - NotAfter: time.Now().AddDate(0, 0, 90), - OrderID: "stub-order-id", + CertPEM: certPEM, + ChainPEM: chainPEM, + Serial: serial, + NotBefore: notBefore, + NotAfter: notAfter, + OrderID: order.URI, }, nil } -// RenewCertificate renews an existing certificate by submitting a new ACME order. -// The process is identical to IssueCertificate but uses the existing CSR from the previous certificate. -// -// TODO: Implement actual ACME protocol using golang.org/x/crypto/acme. +// RenewCertificate renews a certificate by creating a new ACME order. +// The process is identical to issuance — ACME doesn't distinguish between new and renewal. func (c *Connector) RenewCertificate(ctx context.Context, request issuer.RenewalRequest) (*issuer.IssuanceResult, error) { c.logger.Info("processing ACME renewal request", "common_name", request.CommonName, "san_count", len(request.SANs)) - // TODO: Implement ACME renewal. - // In production: - // 1. Create a new order with the same identifiers - // 2. Obtain and solve authorization challenges - // 3. Submit the CSR (from request.CSRPEM) - // 4. Retrieve the issued certificate and chain - - c.logger.Warn("ACME renewal not yet implemented", "common_name", request.CommonName) - - // Stub: Return a placeholder result - return &issuer.IssuanceResult{ - CertPEM: "-----BEGIN CERTIFICATE-----\n(stub renewed)\n-----END CERTIFICATE-----\n", - ChainPEM: "-----BEGIN CERTIFICATE-----\n(stub chain)\n-----END CERTIFICATE-----\n", - Serial: "stub-serial-renewal-123456", - NotBefore: time.Now(), - NotAfter: time.Now().AddDate(0, 0, 90), - OrderID: "stub-order-renewal-id", - }, nil + return c.IssueCertificate(ctx, issuer.IssuanceRequest{ + CommonName: request.CommonName, + SANs: request.SANs, + CSRPEM: request.CSRPEM, + }) } // RevokeCertificate revokes a certificate at the ACME CA. -// The CA will no longer consider the certificate valid. -// -// TODO: Implement revocation via ACME protocol. func (c *Connector) RevokeCertificate(ctx context.Context, request issuer.RevocationRequest) error { c.logger.Info("processing ACME revocation request", "serial", request.Serial) - // TODO: Implement ACME revocation. - // In production: - // 1. Retrieve the certificate PEM - // 2. Post revocation request to CA's revocation endpoint - // 3. Provide reason if given + if err := c.ensureClient(ctx); err != nil { + return fmt.Errorf("ACME client init: %w", err) + } - c.logger.Warn("ACME revocation not yet implemented", "serial", request.Serial) - return nil + // ACME revocation requires the certificate DER, not just the serial. + // For now, log a warning. Full revocation requires storing the cert DER + // or re-fetching it from the order. + c.logger.Warn("ACME revocation requires certificate DER bytes; serial-only revocation not supported in V1", + "serial", request.Serial) + return fmt.Errorf("ACME revocation by serial not supported in V1; provide certificate DER") } // GetOrderStatus retrieves the current status of an ACME order. -// This is useful for polling the status of pending issuance or renewal orders. -// -// TODO: Implement order status polling. func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer.OrderStatus, error) { c.logger.Info("fetching ACME order status", "order_id", orderID) - // TODO: Implement ACME order status polling. - // In production: - // 1. Connect to the ACME directory - // 2. Fetch order status by orderID - // 3. Return current status, message, and any issued certificate material + if err := c.ensureClient(ctx); err != nil { + return nil, fmt.Errorf("ACME client init: %w", err) + } - c.logger.Warn("ACME order status polling not yet implemented", "order_id", orderID) + order, err := c.client.GetOrder(ctx, orderID) + if err != nil { + return nil, fmt.Errorf("failed to get order: %w", err) + } - // Stub: Return a placeholder status - return &issuer.OrderStatus{ + status := &issuer.OrderStatus{ OrderID: orderID, - Status: "processing", - Message: nil, + Status: string(order.Status), UpdatedAt: time.Now(), - }, nil + } + + return status, nil +} + +// solveAuthorizations processes all authorization URLs and solves their HTTP-01 challenges. +func (c *Connector) solveAuthorizations(ctx context.Context, authzURLs []string) error { + // Start the challenge server + srv, err := c.startChallengeServer() + if err != nil { + return fmt.Errorf("failed to start challenge server: %w", err) + } + defer func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _ = srv.Shutdown(shutdownCtx) + c.logger.Debug("challenge server stopped") + }() + + for _, authzURL := range authzURLs { + authz, err := c.client.GetAuthorization(ctx, authzURL) + if err != nil { + return fmt.Errorf("failed to get authorization %s: %w", authzURL, err) + } + + if authz.Status == acme.StatusValid { + continue + } + + // Find the HTTP-01 challenge + var httpChallenge *acme.Challenge + for _, ch := range authz.Challenges { + if ch.Type == "http-01" { + httpChallenge = ch + break + } + } + + if httpChallenge == nil { + return fmt.Errorf("no HTTP-01 challenge found for %s", authz.Identifier.Value) + } + + // Compute the key authorization + keyAuth, err := c.client.HTTP01ChallengeResponse(httpChallenge.Token) + if err != nil { + return fmt.Errorf("failed to compute key authorization: %w", err) + } + + // Store it for the challenge server to serve + c.challengeMu.Lock() + c.challengeTokens[httpChallenge.Token] = keyAuth + c.challengeMu.Unlock() + + c.logger.Info("accepting HTTP-01 challenge", + "domain", authz.Identifier.Value, + "token", httpChallenge.Token) + + // Tell the CA we're ready + if _, err := c.client.Accept(ctx, httpChallenge); err != nil { + return fmt.Errorf("failed to accept challenge: %w", err) + } + + // Wait for authorization to be valid + if _, err := c.client.WaitAuthorization(ctx, authzURL); err != nil { + return fmt.Errorf("authorization failed for %s: %w", authz.Identifier.Value, err) + } + + c.logger.Info("authorization validated", "domain", authz.Identifier.Value) + + // Clean up token + c.challengeMu.Lock() + delete(c.challengeTokens, httpChallenge.Token) + c.challengeMu.Unlock() + } + + return nil +} + +// startChallengeServer starts an HTTP server that responds to ACME HTTP-01 challenges. +// It listens on the configured HTTP port and serves challenge tokens at +// /.well-known/acme-challenge/{token}. +func (c *Connector) startChallengeServer() (*http.Server, error) { + mux := http.NewServeMux() + mux.HandleFunc("/.well-known/acme-challenge/", func(w http.ResponseWriter, r *http.Request) { + token := r.URL.Path[len("/.well-known/acme-challenge/"):] + + c.challengeMu.RLock() + keyAuth, ok := c.challengeTokens[token] + c.challengeMu.RUnlock() + + if !ok { + c.logger.Warn("unknown challenge token", "token", token) + http.NotFound(w, r) + return + } + + c.logger.Debug("serving challenge response", "token", token) + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write([]byte(keyAuth)) + }) + + addr := fmt.Sprintf(":%d", c.config.HTTPPort) + srv := &http.Server{ + Addr: addr, + Handler: mux, + } + + ln, err := net.Listen("tcp", addr) + if err != nil { + return nil, fmt.Errorf("failed to listen on %s: %w", addr, err) + } + + go func() { + c.logger.Info("challenge server started", "address", addr) + if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed { + c.logger.Error("challenge server error", "error", err) + } + }() + + return srv, nil +} + +// buildIdentifiers constructs ACME domain identifiers from common name and SANs. +func buildIdentifiers(commonName string, sans []string) []acme.AuthzID { + seen := make(map[string]bool) + var ids []acme.AuthzID + + // Add CN first + if commonName != "" { + seen[commonName] = true + ids = append(ids, acme.AuthzID{Type: "dns", Value: commonName}) + } + + // Add SANs, deduplicating + for _, san := range sans { + if san != "" && !seen[san] { + seen[san] = true + ids = append(ids, acme.AuthzID{Type: "dns", Value: san}) + } + } + + return ids +} + +// parseCSRPEM decodes a PEM-encoded CSR to DER bytes. +func parseCSRPEM(csrPEM string) ([]byte, error) { + block, _ := pem.Decode([]byte(csrPEM)) + if block == nil { + return nil, fmt.Errorf("failed to decode CSR PEM") + } + if block.Type != "CERTIFICATE REQUEST" { + return nil, fmt.Errorf("unexpected PEM type: %s (expected CERTIFICATE REQUEST)", block.Type) + } + return block.Bytes, nil +} + +// parseDERChain converts a DER certificate chain to PEM strings and extracts metadata. +func parseDERChain(derChain [][]byte) (certPEM string, chainPEM string, serial string, notBefore time.Time, notAfter time.Time, err error) { + if len(derChain) == 0 { + err = fmt.Errorf("empty certificate chain") + return + } + + // First cert is the leaf + leafCert, parseErr := x509.ParseCertificate(derChain[0]) + if parseErr != nil { + err = fmt.Errorf("failed to parse leaf certificate: %w", parseErr) + return + } + + serial = leafCert.SerialNumber.String() + notBefore = leafCert.NotBefore + notAfter = leafCert.NotAfter + + // Encode leaf to PEM + certPEM = string(pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: derChain[0], + })) + + // Encode remaining chain certs to PEM + for i := 1; i < len(derChain); i++ { + chainPEM += string(pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: derChain[i], + })) + } + + return } diff --git a/internal/domain/job.go b/internal/domain/job.go index 8673c2b..36191f4 100644 --- a/internal/domain/job.go +++ b/internal/domain/job.go @@ -48,3 +48,15 @@ type DeploymentJob struct { AgentID string `json:"agent_id"` DeploymentResult json.RawMessage `json:"deployment_result,omitempty"` } + +// WorkItem enriches a Job with target details so the agent knows which connector to use. +// Returned by GET /api/v1/agents/{id}/work. +type WorkItem struct { + ID string `json:"id"` + Type JobType `json:"type"` + CertificateID string `json:"certificate_id"` + TargetID *string `json:"target_id,omitempty"` + TargetType string `json:"target_type,omitempty"` + TargetConfig json.RawMessage `json:"target_config,omitempty"` + Status JobStatus `json:"status"` +} diff --git a/internal/service/agent.go b/internal/service/agent.go index 42e80f7..c7377c9 100644 --- a/internal/service/agent.go +++ b/internal/service/agent.go @@ -17,6 +17,7 @@ type AgentService struct { agentRepo repository.AgentRepository certRepo repository.CertificateRepository jobRepo repository.JobRepository + targetRepo repository.TargetRepository auditService *AuditService issuerRegistry map[string]IssuerConnector } @@ -26,6 +27,7 @@ func NewAgentService( agentRepo repository.AgentRepository, certRepo repository.CertificateRepository, jobRepo repository.JobRepository, + targetRepo repository.TargetRepository, auditService *AuditService, issuerRegistry map[string]IssuerConnector, ) *AgentService { @@ -33,6 +35,7 @@ func NewAgentService( agentRepo: agentRepo, certRepo: certRepo, jobRepo: jobRepo, + targetRepo: targetRepo, auditService: auditService, issuerRegistry: issuerRegistry, } @@ -103,6 +106,8 @@ func (s *AgentService) Heartbeat(agentID string) error { } // SubmitCSR validates and processes a Certificate Signing Request from an agent. +// It forwards the CSR to the appropriate issuer connector for signing, then stores +// the resulting certificate version. func (s *AgentService) SubmitCSR(ctx context.Context, agentID string, certID string, csrPEM []byte) error { // Fetch agent agent, err := s.agentRepo.Get(ctx, agentID) @@ -110,16 +115,54 @@ func (s *AgentService) SubmitCSR(ctx context.Context, agentID string, certID str return fmt.Errorf("failed to fetch agent: %w", err) } - // Validate CSR format (basic check) + // Validate CSR format if len(csrPEM) == 0 { return fmt.Errorf("invalid CSR: empty") } - // In production, parse and validate the CSR signature and CN here - // For now, accept and proceed + // If a certificate ID is provided, sign the CSR via the issuer connector + if certID != "" { + cert, err := s.certRepo.Get(ctx, certID) + if err != nil { + return fmt.Errorf("failed to fetch certificate: %w", err) + } - // In a production system, we'd store the CSR in a certificate version or metadata - // For now, we just validate and accept it + // Look up the issuer connector + connector, ok := s.issuerRegistry[cert.IssuerID] + if ok { + // Sign the CSR via the issuer connector + result, err := connector.IssueCertificate(ctx, cert.CommonName, cert.SANs, string(csrPEM)) + if err != nil { + return fmt.Errorf("issuer signing failed: %w", err) + } + + // Store the signed certificate as a new version + version := &domain.CertificateVersion{ + ID: generateID("certver"), + CertificateID: certID, + SerialNumber: result.Serial, + NotBefore: result.NotBefore, + NotAfter: result.NotAfter, + PEMChain: result.CertPEM + "\n" + result.ChainPEM, + CSRPEM: string(csrPEM), + CreatedAt: time.Now(), + } + + if err := s.certRepo.CreateVersion(ctx, version); err != nil { + return fmt.Errorf("failed to store certificate version: %w", err) + } + + // Update certificate status and expiry + cert.Status = domain.CertificateStatusActive + cert.ExpiresAt = result.NotAfter + now := time.Now() + cert.LastRenewalAt = &now + cert.UpdatedAt = now + if err := s.certRepo.Update(ctx, cert); err != nil { + fmt.Printf("failed to update certificate: %v\n", err) + } + } + } // Record audit event if err := s.auditService.RecordEvent(ctx, agent.ID, domain.ActorTypeAgent, @@ -305,14 +348,78 @@ func (s *AgentService) RegisterAgent(agent domain.Agent) (*domain.Agent, error) } // CSRSubmit processes a CSR submission from an agent (handler interface method). +// The csrPEM parameter contains "certID:csrPEM" or just the CSR PEM. func (s *AgentService) CSRSubmit(agentID string, csrPEM string) (string, error) { - // For the handler interface, we accept the CSR as a string err := s.SubmitCSR(context.Background(), agentID, "", []byte(csrPEM)) if err != nil { return "", err } - // Return the CSR as acknowledgment - return csrPEM, nil + return "csr_accepted", nil +} + +// CSRSubmitForCert processes a CSR submission for a specific certificate (handler interface method). +func (s *AgentService) CSRSubmitForCert(agentID string, certID string, csrPEM string) (string, error) { + err := s.SubmitCSR(context.Background(), agentID, certID, []byte(csrPEM)) + if err != nil { + return "", err + } + return "csr_signed", nil +} + +// GetWork returns pending deployment jobs for an agent (handler interface method). +func (s *AgentService) GetWork(agentID string) ([]domain.Job, error) { + jobs, err := s.GetPendingWork(context.Background(), agentID) + if err != nil { + return nil, err + } + var result []domain.Job + for _, j := range jobs { + if j != nil { + result = append(result, *j) + } + } + return result, nil +} + +// GetWorkWithTargets returns pending deployment jobs enriched with target type and config. +// This allows agents to know which connector to invoke for each deployment job. +func (s *AgentService) GetWorkWithTargets(agentID string) ([]domain.WorkItem, error) { + jobs, err := s.GetPendingWork(context.Background(), agentID) + if err != nil { + return nil, err + } + + var items []domain.WorkItem + for _, j := range jobs { + if j == nil { + continue + } + item := domain.WorkItem{ + ID: j.ID, + Type: j.Type, + CertificateID: j.CertificateID, + TargetID: j.TargetID, + Status: j.Status, + } + + // Enrich with target details if target ID is present + if j.TargetID != nil && *j.TargetID != "" { + target, err := s.targetRepo.Get(context.Background(), *j.TargetID) + if err == nil { + item.TargetType = string(target.Type) + item.TargetConfig = target.Config + } + } + + items = append(items, item) + } + + return items, nil +} + +// UpdateJobStatus reports a job's status from an agent (handler interface method). +func (s *AgentService) UpdateJobStatus(agentID string, jobID string, status string, errMsg string) error { + return s.ReportJobStatus(context.Background(), agentID, jobID, domain.JobStatus(status), errMsg) } // CertificatePickup retrieves a certificate for an agent (handler interface method). diff --git a/internal/service/deployment.go b/internal/service/deployment.go index d945eca..aff4959 100644 --- a/internal/service/deployment.go +++ b/internal/service/deployment.go @@ -279,7 +279,7 @@ func (s *DeploymentService) MarkDeploymentFailed(ctx context.Context, jobID stri } // Send deployment failure notification - if err := s.notificationSvc.SendDeploymentNotification(ctx, cert, target, false, fmt.Errorf(errMsg)); err != nil { + if err := s.notificationSvc.SendDeploymentNotification(ctx, cert, target, false, fmt.Errorf("%s", errMsg)); err != nil { fmt.Printf("failed to send deployment notification: %v\n", err) } } diff --git a/internal/service/issuer_adapter.go b/internal/service/issuer_adapter.go new file mode 100644 index 0000000..5f221fb --- /dev/null +++ b/internal/service/issuer_adapter.go @@ -0,0 +1,59 @@ +package service + +import ( + "context" + + "github.com/shankar0123/certctl/internal/connector/issuer" +) + +// IssuerConnectorAdapter bridges the connector-layer issuer.Connector interface with the +// service-layer IssuerConnector interface. This maintains dependency inversion: the service +// layer defines the interface it needs, and this adapter wraps the concrete connector. +type IssuerConnectorAdapter struct { + connector issuer.Connector +} + +// NewIssuerConnectorAdapter wraps an issuer.Connector to implement service.IssuerConnector. +func NewIssuerConnectorAdapter(c issuer.Connector) IssuerConnector { + return &IssuerConnectorAdapter{connector: c} +} + +// IssueCertificate delegates to the underlying connector's IssueCertificate method, +// translating between service-layer and connector-layer types. +func (a *IssuerConnectorAdapter) IssueCertificate(ctx context.Context, commonName string, sans []string, csrPEM string) (*IssuanceResult, error) { + result, err := a.connector.IssueCertificate(ctx, issuer.IssuanceRequest{ + CommonName: commonName, + SANs: sans, + CSRPEM: csrPEM, + }) + if err != nil { + return nil, err + } + return &IssuanceResult{ + CertPEM: result.CertPEM, + ChainPEM: result.ChainPEM, + Serial: result.Serial, + NotBefore: result.NotBefore, + NotAfter: result.NotAfter, + }, nil +} + +// RenewCertificate delegates to the underlying connector's RenewCertificate method, +// translating between service-layer and connector-layer types. +func (a *IssuerConnectorAdapter) RenewCertificate(ctx context.Context, commonName string, sans []string, csrPEM string) (*IssuanceResult, error) { + result, err := a.connector.RenewCertificate(ctx, issuer.RenewalRequest{ + CommonName: commonName, + SANs: sans, + CSRPEM: csrPEM, + }) + if err != nil { + return nil, err + } + return &IssuanceResult{ + CertPEM: result.CertPEM, + ChainPEM: result.ChainPEM, + Serial: result.Serial, + NotBefore: result.NotBefore, + NotAfter: result.NotAfter, + }, nil +} diff --git a/internal/service/job.go b/internal/service/job.go index ecb3491..37347c0 100644 --- a/internal/service/job.go +++ b/internal/service/job.go @@ -95,22 +95,15 @@ func (s *JobService) processJob(ctx context.Context, job *domain.Job) error { } // processIssuanceJob handles a certificate issuance job. -// This is a placeholder that documents the flow. -// TODO: Implement actual issuance job processing if needed. +// It reuses the renewal service's ProcessRenewalJob since the flow is identical: +// generate key → create CSR → call issuer → store version → create deployment jobs. +// The only difference is semantics (new cert vs renewed cert), not mechanics. func (s *JobService) processIssuanceJob(ctx context.Context, job *domain.Job) error { s.logger.Debug("processing issuance job", "job_id", job.ID) - // TODO: Implement issuance job processing - // In production: - // 1. Fetch the certificate - // 2. Fetch the issuer - // 3. Generate or retrieve CSR - // 4. Call issuer to issue new certificate - // 5. Create certificate version - // 6. Update certificate status - // 7. Mark job as completed - - return fmt.Errorf("issuance job processing not yet implemented") + // Issuance follows the same code path as renewal for the Local CA: + // generate server-side key + CSR → sign via issuer → store cert version → deploy + return s.renewalService.ProcessRenewalJob(ctx, job) } // processValidationJob handles a certificate validation job. diff --git a/internal/service/renewal.go b/internal/service/renewal.go index fa42bc9..9552ad7 100644 --- a/internal/service/renewal.go +++ b/internal/service/renewal.go @@ -2,6 +2,13 @@ package service import ( "context" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "crypto/x509/pkix" + "encoding/hex" + "encoding/pem" "fmt" "time" @@ -11,19 +18,30 @@ import ( // RenewalService manages certificate renewal workflows. type RenewalService struct { - certRepo repository.CertificateRepository - jobRepo repository.JobRepository - auditService *AuditService - notificationSvc *NotificationService - issuerRegistry map[string]IssuerConnector + certRepo repository.CertificateRepository + jobRepo repository.JobRepository + auditService *AuditService + notificationSvc *NotificationService + issuerRegistry map[string]IssuerConnector } -// IssuerConnector defines the interface for interacting with certificate issuers. +// IssuerConnector defines the service-layer interface for interacting with certificate issuers. +// This is distinct from the connector-layer issuer.Connector interface to maintain dependency +// inversion. Use IssuerConnectorAdapter to bridge between the two. type IssuerConnector interface { - // RenewCertificate renews a certificate and returns the new certificate PEM. - RenewCertificate(ctx context.Context, csr []byte) ([]byte, error) - // GetCertificateChain returns the issuer's certificate chain. - GetCertificateChain(ctx context.Context) ([]byte, error) + // IssueCertificate issues a new certificate using the provided CSR PEM. + IssueCertificate(ctx context.Context, commonName string, sans []string, csrPEM string) (*IssuanceResult, error) + // RenewCertificate renews a certificate using the provided CSR PEM. + RenewCertificate(ctx context.Context, commonName string, sans []string, csrPEM string) (*IssuanceResult, error) +} + +// IssuanceResult holds the result of a certificate issuance or renewal operation. +type IssuanceResult struct { + CertPEM string + ChainPEM string + Serial string + NotBefore time.Time + NotAfter time.Time } // NewRenewalService creates a new renewal service. @@ -72,12 +90,29 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error { continue } + // Check for existing pending/running renewal jobs to avoid duplicates + existingJobs, err := s.jobRepo.ListByCertificate(ctx, cert.ID) + if err == nil { + hasActiveRenewal := false + for _, j := range existingJobs { + if j.Type == domain.JobTypeRenewal && + (j.Status == domain.JobStatusPending || j.Status == domain.JobStatusRunning) { + hasActiveRenewal = true + break + } + } + if hasActiveRenewal { + continue + } + } + // Create renewal job job := &domain.Job{ ID: generateID("job"), CertificateID: cert.ID, Type: domain.JobTypeRenewal, Status: domain.JobStatusPending, + MaxAttempts: 3, ScheduledAt: time.Now(), CreatedAt: time.Now(), } @@ -87,6 +122,12 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error { continue } + // Update certificate status to RenewalInProgress + cert.Status = domain.CertificateStatusRenewalInProgress + if err := s.certRepo.Update(ctx, cert); err != nil { + fmt.Printf("failed to update cert status for %s: %v\n", cert.ID, err) + } + // Record audit event _ = s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem, "renewal_job_created", "certificate", cert.ID, @@ -96,7 +137,13 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error { return nil } -// ProcessRenewalJob executes a renewal job: call issuer, store new version, update cert status. +// ProcessRenewalJob executes a renewal job: generate CSR, call issuer, store new version, +// update cert status, and create deployment jobs for targets. +// +// V1 Architecture Note: For the Local CA issuer, the control plane generates a server-side +// ephemeral key + CSR. The private key is stored in the CertificateVersion.CSRPEM field +// so agents can retrieve it for deployment. In V2+ with ACME/external CAs, agents will +// generate keys locally and submit CSRs, so private keys never leave the target infrastructure. func (s *RenewalService) ProcessRenewalJob(ctx context.Context, job *domain.Job) error { // Update job status to in-progress if err := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusRunning, ""); err != nil { @@ -106,40 +153,59 @@ func (s *RenewalService) ProcessRenewalJob(ctx context.Context, job *domain.Job) // Fetch certificate cert, err := s.certRepo.Get(ctx, job.CertificateID) if err != nil { - updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, fmt.Sprintf("certificate fetch failed: %v", err)) - if updateErr != nil { - fmt.Printf("failed to update job status: %v\n", updateErr) - } + s.failJob(ctx, job, fmt.Sprintf("certificate fetch failed: %v", err)) return fmt.Errorf("failed to fetch certificate: %w", err) } // Get issuer connector issuerID := cert.IssuerID if issuerID == "" { + s.failJob(ctx, job, "certificate has no issuer assigned") return fmt.Errorf("certificate has no issuer assigned") } connector, ok := s.issuerRegistry[issuerID] if !ok { - updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, - fmt.Sprintf("issuer connector not found for %s", issuerID)) - if updateErr != nil { - fmt.Printf("failed to update job status: %v\n", updateErr) - } + s.failJob(ctx, job, fmt.Sprintf("issuer connector not found for %s", issuerID)) return fmt.Errorf("issuer connector not found for %s", issuerID) } - // TODO: In production, fetch CSR from agent or generate new CSR - // For now, we'd use cert.CSR or generate a new one from the private key - csr := []byte{} // placeholder - - // Call issuer to renew - certPEM, err := connector.RenewCertificate(ctx, csr) + // Generate server-side RSA key + CSR for this renewal + // V1: server generates ephemeral key for Local CA. V2+: agent generates key locally. + privKey, err := rsa.GenerateKey(rand.Reader, 2048) if err != nil { - updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, fmt.Sprintf("issuer renewal failed: %v", err)) - if updateErr != nil { - fmt.Printf("failed to update job status: %v\n", updateErr) - } + s.failJob(ctx, job, fmt.Sprintf("key generation failed: %v", err)) + return fmt.Errorf("failed to generate private key: %w", err) + } + + csrTemplate := &x509.CertificateRequest{ + Subject: pkix.Name{ + CommonName: cert.CommonName, + }, + DNSNames: cert.SANs, + } + + csrDER, err := x509.CreateCertificateRequest(rand.Reader, csrTemplate, privKey) + if err != nil { + s.failJob(ctx, job, fmt.Sprintf("CSR generation failed: %v", err)) + return fmt.Errorf("failed to generate CSR: %w", err) + } + + csrPEM := string(pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE REQUEST", + Bytes: csrDER, + })) + + // Encode private key to PEM for storage (V1: stored so agent can retrieve for deployment) + privKeyPEM := string(pem.EncodeToMemory(&pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: x509.MarshalPKCS1PrivateKey(privKey), + })) + + // Call issuer connector to renew + result, err := connector.RenewCertificate(ctx, cert.CommonName, cert.SANs, csrPEM) + if err != nil { + s.failJob(ctx, job, fmt.Sprintf("issuer renewal failed: %v", err)) // Send failure notification _ = s.notificationSvc.SendRenewalNotification(ctx, cert, false, err) @@ -152,38 +218,63 @@ func (s *RenewalService) ProcessRenewalJob(ctx context.Context, job *domain.Job) return fmt.Errorf("issuer renewal failed: %w", err) } + // Compute SHA-256 fingerprint of the issued certificate + fingerprint := computeCertFingerprint(result.CertPEM) + // Create new certificate version version := &domain.CertificateVersion{ - ID: generateID("certver"), - CertificateID: job.CertificateID, - SerialNumber: fmt.Sprintf("renewed-%d", time.Now().Unix()), - PEMChain: string(certPEM), - CreatedAt: time.Now(), + ID: generateID("certver"), + CertificateID: job.CertificateID, + SerialNumber: result.Serial, + NotBefore: result.NotBefore, + NotAfter: result.NotAfter, + FingerprintSHA256: fingerprint, + PEMChain: result.CertPEM + "\n" + result.ChainPEM, + CSRPEM: privKeyPEM, // V1: stores private key for agent deployment + CreatedAt: time.Now(), } if err := s.certRepo.CreateVersion(ctx, version); err != nil { - updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, fmt.Sprintf("version creation failed: %v", err)) - if updateErr != nil { - fmt.Printf("failed to update job status: %v\n", updateErr) - } + s.failJob(ctx, job, fmt.Sprintf("version creation failed: %v", err)) return fmt.Errorf("failed to create certificate version: %w", err) } - // Update certificate status + // Update certificate status and expiry cert.Status = domain.CertificateStatusActive + cert.ExpiresAt = result.NotAfter + now := time.Now() + cert.LastRenewalAt = &now + cert.UpdatedAt = now if err := s.certRepo.Update(ctx, cert); err != nil { - updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, fmt.Sprintf("cert update failed: %v", err)) - if updateErr != nil { - fmt.Printf("failed to update job status: %v\n", updateErr) - } + s.failJob(ctx, job, fmt.Sprintf("cert update failed: %v", err)) return fmt.Errorf("failed to update certificate: %w", err) } - // Mark job as completed + // Mark renewal job as completed if err := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusCompleted, ""); err != nil { return fmt.Errorf("failed to update job status: %w", err) } + // Create deployment jobs for each target + if len(cert.TargetIDs) > 0 { + for _, targetID := range cert.TargetIDs { + tid := targetID // capture loop variable + deployJob := &domain.Job{ + ID: generateID("job"), + CertificateID: cert.ID, + Type: domain.JobTypeDeployment, + Status: domain.JobStatusPending, + TargetID: &tid, + MaxAttempts: 3, + ScheduledAt: time.Now(), + CreatedAt: time.Now(), + } + if err := s.jobRepo.Create(ctx, deployJob); err != nil { + fmt.Printf("failed to create deployment job for target %s: %v\n", targetID, err) + } + } + } + // Send success notification if err := s.notificationSvc.SendRenewalNotification(ctx, cert, true, nil); err != nil { fmt.Printf("failed to send renewal notification: %v\n", err) @@ -192,12 +283,33 @@ func (s *RenewalService) ProcessRenewalJob(ctx context.Context, job *domain.Job) // Record audit event _ = s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem, "renewal_job_completed", "certificate", job.CertificateID, - map[string]interface{}{"job_id": job.ID, "serial": version.SerialNumber}) + map[string]interface{}{ + "job_id": job.ID, + "serial": result.Serial, + "not_after": result.NotAfter, + }) return nil } -// Retry attempts to reprocess failed renewal jobs with exponential backoff. +// failJob is a helper to mark a job as failed with an error message. +func (s *RenewalService) failJob(ctx context.Context, job *domain.Job, errMsg string) { + if updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, errMsg); updateErr != nil { + fmt.Printf("failed to update job status: %v\n", updateErr) + } +} + +// computeCertFingerprint computes the SHA-256 fingerprint of a PEM-encoded certificate. +func computeCertFingerprint(certPEM string) string { + block, _ := pem.Decode([]byte(certPEM)) + if block == nil { + return "" + } + hash := sha256.Sum256(block.Bytes) + return hex.EncodeToString(hash[:]) +} + +// RetryFailedJobs resets failed renewal jobs for retry if they haven't exceeded max attempts. func (s *RenewalService) RetryFailedJobs(ctx context.Context, maxRetries int) error { failedJobs, err := s.jobRepo.ListByStatus(ctx, domain.JobStatusFailed) if err != nil {