mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 16:11:29 +00:00
52248be717
Breaking change release. Plaintext HTTP listener removed. The certctl control plane now terminates TLS 1.3 on :8443 via http.Server.ListenAndServeTLS. No CERTCTL_TLS_ENABLED=false escape hatch. No dual-listener mode. One-step cutover per docs/upgrade-to-tls.md. Server - cmd/server/tls.go: certHolder with SIGHUP hot-reload + atomic cert swap, buildServerTLSConfig (TLS 1.3 min, GetCertificate callback), preflightServerTLS validation - cmd/server/main.go: ListenAndServeTLS in place of ListenAndServe, watchSIGHUP wiring, cert/key path config threading - tls_test.go: 418-line regression coverage of reload, preflight, callback behavior, SAN validation Config - CERTCTL_TLS_CERT_PATH / CERTCTL_TLS_KEY_PATH (required) - Plaintext rejection: agents/CLI/MCP pre-flight-fail on http:// URLs with a pointer to docs/upgrade-to-tls.md Agents, CLI, MCP - All three pre-flight-reject http:// URLs with fail-loud diagnostic - CERTCTL_SERVER_CA_BUNDLE_PATH for private-CA trust - CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY for dev-only bypass (loud warning on startup) - install-agent.sh emits both vars as commented template lines docker-compose - certctl-tls-init sidecar generates SAN-valid self-signed cert into deploy/test/certs/ on first boot - All demo-stack curls pin against ca.crt with --cacert Helm chart - Three TLS provisioning modes, exactly one required: - server.tls.existingSecret (operator-supplied) - server.tls.certManager.enabled (cert-manager integration) - server.tls.selfSigned.enabled (eval only — not for production) - server-certificate.yaml template for cert-manager mode - helm install without a TLS source fails at template render with a pointer to docs/tls.md CI - .github/workflows/ci.yml Helm Chart Validation step renders the chart in both existingSecret and cert-manager modes, plus an inverse guard-regression test that asserts helm template MUST refuse to render when no TLS source is configured. Previously the single `helm template` invocation hit the certctl.tls.required fail-loud guard and exit-1'd CI. Four invocations now: lint (existingSecret), template (existingSecret), template (cert-manager), template (no args — must fail). Integration tests - deploy/test/integration_test.go stands up the Compose stack over HTTPS, extracts the CA bundle, and exercises every certctl API over https://localhost:8443 - All 34 integration subtests green (per Phase 8 local CI-parity) Documentation - New: docs/tls.md (provisioning patterns, rotation, SIGHUP reload) - New: docs/upgrade-to-tls.md (one-step cutover, no-downgrade warnings, fleet-roll sequencing) - CHANGELOG.md: v2.2.0 "HTTPS Everywhere — The Irony" entry (file heading unchanged; release tag is v2.0.47) - All curls in docs/, examples/, deploy/helm/ guides use https://localhost:8443 --cacert Verification - grep -rn "ListenAndServe[^T]" cmd/ internal/ → 0 hits - grep -rn "\"http://" cmd/ internal/ → 2 benign hits (Caddy admin API default, SSRF doc comment) — zero certctl endpoints - Tasks #197–#206 (Phases 0–8) all closed in the tracker Files: 65 changed, 3489 insertions, 372 deletions (pre-CI-fix).
215 lines
8.0 KiB
Go
215 lines
8.0 KiB
Go
package mcp
|
|
|
|
import (
|
|
"encoding/json"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"net/url"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
// TestClient_DeleteWithQuery_ForceRetire covers the new transport capability
|
|
// that I-004 adds to the MCP client. The retire tool needs to issue
|
|
// DELETE /api/v1/agents/{id}?force=true&reason=... — Client.Delete as it
|
|
// stands only accepts a path, dropping query parameters on the floor. Phase 2b
|
|
// must add DeleteWithQuery so the MCP retire tool can hit the force escape
|
|
// hatch; without this, every retire-via-MCP call with force=true silently
|
|
// becomes a default soft-retire and either succeeds wrongly or 409s.
|
|
func TestClient_DeleteWithQuery_ForceRetire(t *testing.T) {
|
|
var (
|
|
sawMethod string
|
|
sawPath string
|
|
sawForce string
|
|
sawReason string
|
|
)
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
sawMethod = r.Method
|
|
sawPath = r.URL.Path
|
|
sawForce = r.URL.Query().Get("force")
|
|
sawReason = r.URL.Query().Get("reason")
|
|
|
|
if r.Method != http.MethodDelete || r.URL.Path != "/api/v1/agents/ag-1" {
|
|
w.WriteHeader(http.StatusNotFound)
|
|
return
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
_ = json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"retired_at": "2026-04-18T12:00:00Z",
|
|
"already_retired": false,
|
|
"cascade": true,
|
|
})
|
|
}))
|
|
defer server.Close()
|
|
|
|
c, _ := NewClient(server.URL, "test-key", "", false)
|
|
// Compile-fail until Phase 2b grows Client.DeleteWithQuery. Passing the
|
|
// query as a url.Values is the established pattern (matches Get's shape).
|
|
query := url.Values{}
|
|
query.Set("force", "true")
|
|
query.Set("reason", "decommissioning rack 7")
|
|
data, err := c.DeleteWithQuery("/api/v1/agents/ag-1", query)
|
|
if err != nil {
|
|
t.Fatalf("DeleteWithQuery err=%v want nil", err)
|
|
}
|
|
if data == nil {
|
|
t.Fatal("DeleteWithQuery returned nil data; want 200 body echo-back")
|
|
}
|
|
|
|
if sawMethod != http.MethodDelete {
|
|
t.Errorf("method=%q want DELETE", sawMethod)
|
|
}
|
|
if sawPath != "/api/v1/agents/ag-1" {
|
|
t.Errorf("path=%q want /api/v1/agents/ag-1 (query must be stripped from path)", sawPath)
|
|
}
|
|
if sawForce != "true" {
|
|
t.Errorf("force query=%q want \"true\"", sawForce)
|
|
}
|
|
if sawReason != "decommissioning rack 7" {
|
|
t.Errorf("reason query=%q want %q", sawReason, "decommissioning rack 7")
|
|
}
|
|
}
|
|
|
|
// TestClient_DeleteWithQuery_NoQuery covers the defensive path: a nil/empty
|
|
// query must still produce a clean DELETE against the bare path with no stray
|
|
// "?" suffix. Matches the Get() shape (see client.go do()) so downstream tools
|
|
// can reuse one code path.
|
|
func TestClient_DeleteWithQuery_NoQuery(t *testing.T) {
|
|
var sawRawPath string
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
sawRawPath = r.URL.RequestURI()
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
_ = json.NewEncoder(w).Encode(map[string]interface{}{"ok": true})
|
|
}))
|
|
defer server.Close()
|
|
|
|
c, _ := NewClient(server.URL, "", "", false)
|
|
if _, err := c.DeleteWithQuery("/api/v1/agents/ag-1", nil); err != nil {
|
|
t.Fatalf("DeleteWithQuery(nil query) err=%v want nil", err)
|
|
}
|
|
// No query → no ? suffix.
|
|
if strings.Contains(sawRawPath, "?") {
|
|
t.Errorf("raw path=%q contains stray ?; empty query must not serialize", sawRawPath)
|
|
}
|
|
}
|
|
|
|
// TestClient_DeleteWithQuery_204ReturnsMinimalBody covers the idempotent path.
|
|
// The handler returns 204 No Content for an already-retired agent; the
|
|
// existing do() helper normalises this to {"status":"deleted"}. The new
|
|
// DeleteWithQuery must share that behavior so MCP tool authors don't have to
|
|
// special-case the return shape.
|
|
func TestClient_DeleteWithQuery_204ReturnsMinimalBody(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusNoContent)
|
|
}))
|
|
defer server.Close()
|
|
|
|
c, _ := NewClient(server.URL, "", "", false)
|
|
data, err := c.DeleteWithQuery("/api/v1/agents/ag-1", nil)
|
|
if err != nil {
|
|
t.Fatalf("DeleteWithQuery(204) err=%v want nil (idempotent)", err)
|
|
}
|
|
if data == nil {
|
|
t.Fatal("DeleteWithQuery(204) returned nil; want synthetic body")
|
|
}
|
|
if !strings.Contains(string(data), "deleted") && !strings.Contains(string(data), "status") {
|
|
t.Errorf("DeleteWithQuery(204) body=%q; must surface a non-empty sentinel", string(data))
|
|
}
|
|
}
|
|
|
|
// TestClient_DeleteWithQuery_409PropagatesError covers the preflight-blocked
|
|
// surface. A 409 with dependency counts must bubble up as a Go error so the
|
|
// MCP tool can present it to the LLM operator rather than silently swallow
|
|
// the rejection.
|
|
func TestClient_DeleteWithQuery_409PropagatesError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusConflict)
|
|
_ = json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"error": "blocked_by_dependencies",
|
|
"message": "agent has active targets",
|
|
"counts": map[string]int{
|
|
"active_targets": 3,
|
|
"active_certificates": 7,
|
|
"pending_jobs": 2,
|
|
},
|
|
})
|
|
}))
|
|
defer server.Close()
|
|
|
|
c, _ := NewClient(server.URL, "", "", false)
|
|
_, err := c.DeleteWithQuery("/api/v1/agents/ag-1", nil)
|
|
if err == nil {
|
|
t.Fatalf("DeleteWithQuery(409) err=nil; 409 must propagate as Go error")
|
|
}
|
|
if !strings.Contains(err.Error(), "409") {
|
|
t.Errorf("err=%q should include HTTP status 409 for debuggability", err.Error())
|
|
}
|
|
}
|
|
|
|
// TestRetireAgentInput_ShapePinned is a compile-time assertion that the MCP
|
|
// tool input struct for certctl_retire_agent exists with the required fields
|
|
// and their expected tag shapes. The LLM discovers this input schema via
|
|
// jsonschema tags — refactoring field names without updating callers silently
|
|
// breaks tool discovery.
|
|
//
|
|
// Red until Phase 2b adds RetireAgentInput to internal/mcp/types.go. This
|
|
// assertion deliberately exercises every field so the test fails at compile
|
|
// time rather than runtime.
|
|
func TestRetireAgentInput_ShapePinned(t *testing.T) {
|
|
// Zero-value construction of the expected input — fails to compile until
|
|
// the struct exists with fields {ID string, Force bool, Reason string}.
|
|
input := RetireAgentInput{
|
|
ID: "ag-1",
|
|
Force: true,
|
|
Reason: "decommissioning rack 7",
|
|
}
|
|
|
|
if input.ID != "ag-1" {
|
|
t.Errorf("RetireAgentInput.ID=%q want ag-1 (field binding broken)", input.ID)
|
|
}
|
|
if !input.Force {
|
|
t.Errorf("RetireAgentInput.Force=false want true")
|
|
}
|
|
if input.Reason != "decommissioning rack 7" {
|
|
t.Errorf("RetireAgentInput.Reason=%q want decommissioning rack 7", input.Reason)
|
|
}
|
|
|
|
// Also pin the JSON surface — LLMs send and receive these field names,
|
|
// so json tags must stay snake_case even through refactors.
|
|
encoded, err := json.Marshal(input)
|
|
if err != nil {
|
|
t.Fatalf("marshal RetireAgentInput: %v", err)
|
|
}
|
|
body := string(encoded)
|
|
for _, want := range []string{`"id":"ag-1"`, `"force":true`, `"reason":"decommissioning rack 7"`} {
|
|
if !strings.Contains(body, want) {
|
|
t.Errorf("RetireAgentInput JSON=%q missing %q (tag shape drifted)", body, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestListRetiredAgentsInput_ShapePinned mirrors the pagination input shape
|
|
// used across the MCP toolset (see ListParams). The list-retired-agents tool
|
|
// takes page + per_page with snake_case JSON tags. Compile-fail until
|
|
// Phase 2b either adds ListRetiredAgentsInput or documents that list-retired
|
|
// reuses the existing ListParams type (both paths are acceptable — the test
|
|
// just pins whichever Phase 2b picks).
|
|
func TestListRetiredAgentsInput_ShapePinned(t *testing.T) {
|
|
// Phase 2b may either (a) add a dedicated ListRetiredAgentsInput struct
|
|
// or (b) reuse the existing ListParams. Either is fine — we pin the
|
|
// field-access contract rather than the struct name to let the
|
|
// implementation choose. Compile-fail guards against the tool being
|
|
// registered without any pagination input at all.
|
|
var input ListParams
|
|
input.Page = 1
|
|
input.PerPage = 50
|
|
if input.Page != 1 || input.PerPage != 50 {
|
|
t.Errorf("ListParams fields Page/PerPage broken; listing pagination will misroute")
|
|
}
|
|
}
|