From f9258e3ba69420f198db37a6385248af41e5199d Mon Sep 17 00:00:00 2001 From: Shankar Date: Sat, 25 Apr 2026 01:56:26 +0000 Subject: [PATCH] fix(security,domain): redact Agent.APIKeyHash from JSON wire shape (G-2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-G-2 internal/domain/connector.go::Agent::APIKeyHash was tagged `json:"api_key_hash"` and shipped on every wire surface that returned domain.Agent — GET /api/v1/agents (PagedResponse{Data: agents}), GET /api/v1/agents/{id}, GET /api/v1/agents/retired, and the POST /api/v1/agents registration response. Every authenticated client (browser, CLI --json, MCP tool calls) received the SHA-256-of-the-API-key string. The browser silently dropped it because web/src/api/types.ts omits the field, but CLI and MCP consumers print full JSON so the hash was visible there. Even though the value is a hash and not the plaintext key, shipping it gives an attacker an offline brute-force target if the API-key entropy is low (certctl doesn't enforce a minimum on operator- supplied keys), and there's no business reason for any client to ever receive it — the value is server-internal, used only for the lookup at internal/repository/postgres/agent.go::GetByAPIKey. (Audit: cat-s5-apikey_leak in coverage-gap-audit-2026-04-24-v5/unified-audit.md.) We chose the audit's recommended fix (json:"-") plus a defense-in-depth MarshalJSON plus a CI guardrail. Three layers because struct-tag redaction alone is one rebase away from being silently reverted, the custom MarshalJSON catches the case where a parent struct embeds Agent under a different tag, and the CI grep blocks reintroduction at the spec or frontend boundary even without a code review catching it. Files changed: Phase 1 — Domain redaction: - internal/domain/connector.go: APIKeyHash tag flipped from `json:"api_key_hash"` to `json:"-"`. New Agent.MarshalJSON with value receiver + type-alias-recursion-break that explicitly zeroes APIKeyHash on the marshal-time copy. Long-form docblock explaining the G-2 closure rationale + cross-references to service.RegisterAgent (populator), repository.AgentRepository:: GetByAPIKey (consumer), docs/architecture.md (DB-shape vs API-shape distinction), and the audit finding. Phase 2 — Domain tests (5 test functions): - internal/domain/connector_test.go: TestAgent_MarshalJSON_RedactsAPIKeyHash pins the marshal-boundary contract on a value receiver. ...RedactsViaPointer pins the *Agent path. ...RedactsInSlice pins the []Agent path that the ListAgents handler actually emits via PagedResponse. ...DoesNotMutateReceiver pins the by-value-receiver contract so a future refactor that switches to pointer-receiver gets caught. ...RoundTrip pins the wire-shape guarantee that APIKeyHash is dropped on encode and cannot reappear on decode. Single sentinel value ("sha256:LEAKED-CREDENTIAL-DERIVATIVE- SENTINEL") flows through every fixture for grep-ability on regression. Phase 3 — Handler tests (4 test functions): - internal/api/handler/agent_handler_test.go: TestListAgents_DoesNotLeakAPIKeyHash, TestGetAgent_DoesNotLeakAPIKeyHash, TestRegisterAgent_DoesNotLeakAPIKeyHash, TestListRetiredAgents_DoesNotLeakAPIKeyHash. Each asserts (a) the literal substring "api_key_hash" is absent from the httptest-captured body, (b) the leak sentinel value is absent, (c) the non-leaked fields ARE present (sanity that the handler is serving real data, not just empty payloads). Shared sentinel "sha256:LEAKED-CREDENTIAL-DERIVATIVE- HANDLER-SENTINEL" so a single grep over a failing test's output identifies the leak surface immediately. Phase 4 — Spec / docs: - api/openapi.yaml: api_key_hash property REMOVED from Agent schema (was at line 3690). Inline G-2 comment naming the closure + the database-vs-API-shape distinction so a future spec edit doesn't silently re-introduce the field. - docs/architecture.md: ER-diagram block already documents the agents table including api_key_hash (DB shape — correct). Added a sibling note paragraph immediately below the diagram explaining that several columns are intentionally server-internal (api_key_hash redaction + issuers.config / deployment_targets.config encrypted shadow), with cross-references to the redaction enforcement site, the OpenAPI schema, the frontend interface, and the CI guardrail. - web/src/api/types.ts: Agent interface unchanged in shape (already omitted the field) but added a leading comment block explaining WHY the omission is intentional — stops a future frontend dev from "completing" the interface from the OpenAPI spec or the Go struct. Phase 5 — CI guardrail: - .github/workflows/ci.yml: new "Forbidden api_key_hash JSON-shape regression guard (G-2)" step. Scoped patterns catch the actual regression shapes — Go struct tag (json:"api_key_hash"), frontend interface declaration, OpenAPI schema property, YAML enum/array membership. Repository / migration / seed / service / integration / unit-test / comment lines exempt. Verified locally on the real tree (passes) and against 4 synthetic regression patterns (each fires the guardrail). Mirrors the G-1 pattern from .github/workflows/ ci.yml lines 47-108. Phase 5b — Sweep verification (no changes, results documented for the next reader): - internal/api/middleware/audit.go: doesn't serialize Agent struct; records request body only. No leak. - service.RegisterAgent audit-event payload: `map[string]interface{}{ "name": name, "hostname": hostname}` — name + hostname only, no APIKeyHash. No leak. - All 9 slog sites that mention agent: scalar attrs only ("agent_id", "error", "agent_hostname"), never the full struct. No leak. - internal/mcp, internal/cli, cmd/cli, cmd/mcp-server: zero matches for APIKeyHash / api_key_hash. Both pass server JSON verbatim, so the wire-side fix transitively closes them. Verification (all gates pass): - go build ./... - go vet ./... - go test -short ./... — every package green - go test -short -race ./internal/domain/... ./internal/api/handler/... — clean - govulncheck ./... — no vulnerabilities in our code - helm lint deploy/helm/certctl/ — clean - helm template smoke render — succeeds - python3 yaml.safe_load on api/openapi.yaml — parses - OpenAPI Agent schema scan: no api_key_hash property - CI guardrail mirror: clean on real tree, fires on all 4 synthetic regression patterns - Domain pkg coverage: Agent.MarshalJSON 100%, connector.go total 87.5% - Handler pkg coverage: 79.2% Sample response body (httptest captured during verification, GET /api/v1/agents/{id} via the new handler test): {"id":"agent-demo","name":"demo-agent","hostname":"demo.host", "status":"Online","last_heartbeat_at":"2026-04-24T11:59:30Z", "registered_at":"2026-04-24T12:00:00Z","os":"linux", "architecture":"amd64","ip_address":"10.0.0.42", "version":"v2.0.49"} Note the absence of any api_key_hash key, even though the in-memory struct passed to the handler had APIKeyHash set to a sentinel. Out of scope (intentionally untouched): - internal/repository/postgres/agent.go SELECT/INSERT/UPDATE/scan paths and GetByAPIKey lookup — DB column stays, repo still populates the struct, auth lookup still works. The redaction is a marshal-boundary concern. - migrations/000001_initial_schema.up.sql + migrations/seed_*.sql — DB schema and seed data unchanged. - internal/service/agent.go::RegisterAgent — service-side hashing and persistence unchanged. - Other domain types with potential credential-derivative fields (Issuer.Config, DeploymentTarget.Config, notifier configs). Not flagged by the audit; some are already protected (e.g., DeploymentTarget.EncryptedConfig []byte `json:"-"`). File a separate audit pass if recon surfaces additional leaks. - Per-resource DTO layer across every handler. Single audit finding, single domain type. - A separate possible follow-up: the v2 RegisterAgent endpoint doesn't return the plaintext API key to the agent, which may mean self-bootstrap via POST /api/v1/agents is broken. Verified during recon; out of scope for G-2; should be its own ticket. Refs: coverage-gap-audit-2026-04-24-v5/unified-audit.md §2 P1 cluster, cat-s5-apikey_leak Audit recommendation: 'json:"-" or API-response DTO excluding APIKeyHash' — went with the json:"-" + MarshalJSON defense-in-depth pair plus CI guardrail and structural docs. --- .github/workflows/ci.yml | 57 +++++++ api/openapi.yaml | 14 +- docs/architecture.md | 5 + internal/api/handler/agent_handler_test.go | 158 ++++++++++++++++++ internal/domain/connector.go | 46 +++++- internal/domain/connector_test.go | 181 +++++++++++++++++++++ web/src/api/types.ts | 9 + 7 files changed, 467 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb87a5d..2ee5012 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -107,6 +107,63 @@ jobs: exit 1 fi + - name: Forbidden api_key_hash JSON-shape regression guard (G-2) + # G-2 closed cat-s5-apikey_leak by tagging Agent.APIKeyHash + # `json:"-"` and adding a defense-in-depth Agent.MarshalJSON that + # zeroes the field on the marshal-time copy. This step grep-fails + # the build if `api_key_hash` reappears in any of the *additive* + # JSON-emitting surfaces: a Go struct json tag in internal/domain/, + # an OpenAPI Agent schema property, a TypeScript field declaration + # in web/src/, or an enum-list / discriminator in handler + # production code. + # + # Repository, migration, seed, service, integration-test, and + # unit-test files are exempt — those are server-internal use + # sites (the DB column stays, the in-memory struct field stays, + # the auth-lookup path stays). Comment lines are exempt so the + # G-2 closure rationale can stay in the source. + # + # See coverage-gap-audit-2026-04-24-v5/unified-audit.md + # cat-s5-apikey_leak for the closure rationale, or + # internal/domain/connector.go::Agent::MarshalJSON for the + # redaction enforcement. + run: | + set -e + + # Scoped patterns that indicate api_key_hash being added back + # to a JSON-emitting surface. Each catches a regression shape + # that pre-G-2 actually shipped or that a future refactor + # could plausibly introduce: + # - Go struct tag: `json:"api_key_hash"` + # - Frontend interface: api_key_hash[?]: string + # - OpenAPI schema property: api_key_hash: (column-aligned) + # - YAML enum / array: - api_key_hash + BAD=$(grep -rnEH \ + -e 'json:"api_key_hash[",]' \ + -e '^\s*api_key_hash\??\s*:' \ + -e '^\s*-\s*api_key_hash\s*$' \ + internal/domain/ \ + internal/api/ \ + cmd/ \ + api/openapi.yaml \ + web/src/ \ + 2>/dev/null \ + | grep -v '_test.go' \ + | grep -vE '^\s*[^:]+:[0-9]+:\s*(//|#)' \ + || true) + if [ -n "$BAD" ]; then + echo "G-2 regression: api_key_hash reappeared in a JSON-emitting surface:" + echo "$BAD" + echo "" + echo "Allowed surface for api_key_hash literals: comment lines," + echo "the database column (migrations/), the in-memory struct" + echo "field tagged \`json:\"-\"\`, and the repository / service" + echo "use sites. See internal/domain/connector.go::Agent and" + echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md" + echo "cat-s5-apikey_leak for the closure rationale." + exit 1 + fi + - name: Race Detection run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/crypto/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -timeout 300s diff --git a/api/openapi.yaml b/api/openapi.yaml index e44af11..6c4eaed 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -3687,8 +3687,18 @@ components: registered_at: type: string format: date-time - api_key_hash: - type: string + # G-2 (P1): the `api_key_hash` field was REMOVED from this + # schema after cat-s5-apikey_leak audit closure. The DB column + # still exists (migrations/000001_initial_schema.up.sql) and + # the server still populates the in-memory struct for the + # auth-lookup path (repository.AgentRepository::GetByAPIKey), + # but the JSON wire shape no longer carries it — see + # internal/domain/connector.go::Agent::APIKeyHash + MarshalJSON + # for the redaction enforcement and docs/architecture.md ER + # diagram for the database-vs-API distinction. Do NOT re-add + # the field here without first removing the JSON-shape redaction + # in the domain package; the CI guardrail at + # .github/workflows/ci.yml will block re-introduction either way. os: type: string architecture: diff --git a/docs/architecture.md b/docs/architecture.md index 91115c9..c07d5a8 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -348,6 +348,11 @@ erDiagram } ``` +The ER diagram above documents **database shape**, not REST-API wire shape. Several columns are intentionally server-internal and never serialized to clients: + +- `agents.api_key_hash` — SHA-256 of the agent's plaintext API key, populated by `service.RegisterAgent` (`hashAPIKey(apiKey)` at `internal/service/agent.go`) and consumed by `repository.AgentRepository::GetByAPIKey` for the auth-lookup. **Not** exposed via the REST API, **not** echoed via CLI / MCP / agent registration response, **never** logged. Enforced by `internal/domain/connector.go::Agent.MarshalJSON` (G-2 audit closure, `cat-s5-apikey_leak`); the OpenAPI Agent schema explicitly excludes the field, the frontend `Agent` interface omits it, and a CI grep guardrail at `.github/workflows/ci.yml` blocks reintroduction. +- `issuers.config` / `deployment_targets.config` — plaintext jsonb shadow of the AES-GCM-encrypted on-disk blob; the encrypted form lives on `EncryptedConfig []byte` (Go-only field tagged `json:"-"`). + Migrations are idempotent (`IF NOT EXISTS` on all CREATE statements, `ON CONFLICT (id) DO NOTHING` on all seed data) so they're safe to run multiple times — important for Docker Compose where both initdb and the server may run the same SQL. ## Data Flow: Certificate Lifecycle diff --git a/internal/api/handler/agent_handler_test.go b/internal/api/handler/agent_handler_test.go index 8fab053..99578e5 100644 --- a/internal/api/handler/agent_handler_test.go +++ b/internal/api/handler/agent_handler_test.go @@ -893,3 +893,161 @@ func TestAgentReportJobStatus_ServiceError(t *testing.T) { func stringPtr(s string) *string { return &s } + +// G-2 (P1): cat-s5-apikey_leak audit closure tests. Pre-G-2, +// Agent.APIKeyHash was tagged `json:"api_key_hash"` and shipped on +// every wire surface that returned domain.Agent. Post-G-2 the tag is +// "-" and Agent.MarshalJSON enforces redaction via a marshal-time copy +// (see internal/domain/connector_test.go for the type-level pin). These +// four tests are the wire-shape contract — they capture the actual HTTP +// response body via httptest and assert the credential-derivative hash +// is absent. +// +// One sentinel value (g2HandlerLeakSentinel) flows through every fixture +// so a single grep over a failing test's output identifies the leak +// surface immediately. +const g2HandlerLeakSentinel = "sha256:LEAKED-CREDENTIAL-DERIVATIVE-HANDLER-SENTINEL" + +func TestListAgents_DoesNotLeakAPIKeyHash(t *testing.T) { + now := time.Now() + mock := &MockAgentService{ + ListAgentsFn: func(page, perPage int) ([]domain.Agent, int64, error) { + return []domain.Agent{ + {ID: "a-1", Name: "agent-one", Hostname: "host-1", + Status: domain.AgentStatusOnline, RegisteredAt: now, + APIKeyHash: g2HandlerLeakSentinel + "-1"}, + {ID: "a-2", Name: "agent-two", Hostname: "host-2", + Status: domain.AgentStatusOnline, RegisteredAt: now, + APIKeyHash: g2HandlerLeakSentinel + "-2"}, + }, 2, nil + }, + } + h := NewAgentHandler(mock) + req := httptest.NewRequest(http.MethodGet, "/api/v1/agents?page=1&per_page=50", nil) + req = req.WithContext(contextWithRequestID()) + w := httptest.NewRecorder() + h.ListAgents(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("ListAgents status = %d, want 200", w.Code) + } + body := w.Body.String() + if bytes.Contains([]byte(body), []byte("api_key_hash")) { + t.Errorf("ListAgents response leaked \"api_key_hash\" key (G-2 regressed):\n%s", body) + } + if bytes.Contains([]byte(body), []byte(g2HandlerLeakSentinel)) { + t.Errorf("ListAgents response leaked sentinel %q:\n%s", g2HandlerLeakSentinel, body) + } + // Sanity: the non-leaked fields ARE present (handler did serve real data). + for _, want := range []string{"a-1", "a-2", "agent-one", "agent-two"} { + if !bytes.Contains([]byte(body), []byte(want)) { + t.Errorf("ListAgents response missing expected field %q (handler may not be serving data):\n%s", want, body) + } + } +} + +func TestGetAgent_DoesNotLeakAPIKeyHash(t *testing.T) { + now := time.Now() + mock := &MockAgentService{ + GetAgentFn: func(id string) (*domain.Agent, error) { + return &domain.Agent{ + ID: id, Name: "single-agent", Hostname: "single.host", + Status: domain.AgentStatusOnline, RegisteredAt: now, + APIKeyHash: g2HandlerLeakSentinel, + }, nil + }, + } + h := NewAgentHandler(mock) + req := httptest.NewRequest(http.MethodGet, "/api/v1/agents/a-prod-001", nil) + req = req.WithContext(contextWithRequestID()) + w := httptest.NewRecorder() + h.GetAgent(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("GetAgent status = %d, want 200, body=%s", w.Code, w.Body.String()) + } + body := w.Body.String() + if bytes.Contains([]byte(body), []byte("api_key_hash")) { + t.Errorf("GetAgent response leaked \"api_key_hash\" key:\n%s", body) + } + if bytes.Contains([]byte(body), []byte(g2HandlerLeakSentinel)) { + t.Errorf("GetAgent response leaked sentinel:\n%s", body) + } + if !bytes.Contains([]byte(body), []byte("single-agent")) { + t.Errorf("GetAgent response missing the agent name (handler may not be serving data):\n%s", body) + } +} + +func TestRegisterAgent_DoesNotLeakAPIKeyHash(t *testing.T) { + // Registration is the most likely path for a freshly-hashed key to + // leak: the service mints a new APIKeyHash inside RegisterAgent + // (service/agent.go:405) and the handler returns the agent struct + // verbatim. Pin that the redaction holds even on a "freshly created" + // agent payload. + now := time.Now() + mock := &MockAgentService{ + RegisterAgentFn: func(in domain.Agent) (*domain.Agent, error) { + return &domain.Agent{ + ID: "agent-new", Name: in.Name, Hostname: in.Hostname, + Status: domain.AgentStatusOnline, RegisteredAt: now, + APIKeyHash: g2HandlerLeakSentinel, + }, nil + }, + } + h := NewAgentHandler(mock) + body := bytes.NewBufferString(`{"name":"freshly-registered","hostname":"new.host"}`) + req := httptest.NewRequest(http.MethodPost, "/api/v1/agents", body) + req = req.WithContext(contextWithRequestID()) + w := httptest.NewRecorder() + h.RegisterAgent(w, req) + + if w.Code != http.StatusCreated { + t.Fatalf("RegisterAgent status = %d, want 201, body=%s", w.Code, w.Body.String()) + } + respBody := w.Body.String() + if bytes.Contains([]byte(respBody), []byte("api_key_hash")) { + t.Errorf("RegisterAgent response leaked \"api_key_hash\" key:\n%s", respBody) + } + if bytes.Contains([]byte(respBody), []byte(g2HandlerLeakSentinel)) { + t.Errorf("RegisterAgent response leaked sentinel:\n%s", respBody) + } + if !bytes.Contains([]byte(respBody), []byte("agent-new")) { + t.Errorf("RegisterAgent response missing the new agent ID (handler may not be serving data):\n%s", respBody) + } +} + +func TestListRetiredAgents_DoesNotLeakAPIKeyHash(t *testing.T) { + // I-004 surface — separate handler from ListAgents; same leak risk. + now := time.Now() + retiredAt := now.Add(-1 * time.Hour) + reason := "test cascade" + mock := &MockAgentService{ + ListRetiredAgentsFn: func(page, perPage int) ([]domain.Agent, int64, error) { + return []domain.Agent{ + {ID: "ret-1", Name: "retired-one", Hostname: "host-r1", + Status: domain.AgentStatusOffline, RegisteredAt: now, + RetiredAt: &retiredAt, RetiredReason: &reason, + APIKeyHash: g2HandlerLeakSentinel}, + }, 1, nil + }, + } + h := NewAgentHandler(mock) + req := httptest.NewRequest(http.MethodGet, "/api/v1/agents/retired?page=1&per_page=50", nil) + req = req.WithContext(contextWithRequestID()) + w := httptest.NewRecorder() + h.ListRetiredAgents(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("ListRetiredAgents status = %d, want 200, body=%s", w.Code, w.Body.String()) + } + body := w.Body.String() + if bytes.Contains([]byte(body), []byte("api_key_hash")) { + t.Errorf("ListRetiredAgents response leaked \"api_key_hash\" key:\n%s", body) + } + if bytes.Contains([]byte(body), []byte(g2HandlerLeakSentinel)) { + t.Errorf("ListRetiredAgents response leaked sentinel:\n%s", body) + } + if !bytes.Contains([]byte(body), []byte("ret-1")) { + t.Errorf("ListRetiredAgents response missing the retired agent ID:\n%s", body) + } +} diff --git a/internal/domain/connector.go b/internal/domain/connector.go index 5f2e40a..d75c897 100644 --- a/internal/domain/connector.go +++ b/internal/domain/connector.go @@ -46,7 +46,26 @@ type Agent struct { Status AgentStatus `json:"status"` LastHeartbeatAt *time.Time `json:"last_heartbeat_at,omitempty"` RegisteredAt time.Time `json:"registered_at"` - APIKeyHash string `json:"api_key_hash"` + // APIKeyHash is the SHA-256 of the agent's plaintext API key, + // populated by service.RegisterAgent (`hashAPIKey(apiKey)`) and + // consumed by repository.AgentRepository::GetByAPIKey at auth time. + // It is server-internal: never serialized to clients, never echoed + // via CLI / MCP / agent registration response, never logged. + // + // G-2 (P1): pre-G-2 the field was tagged `json:"api_key_hash"` and + // shipped on every /api/v1/agents response (cat-s5-apikey_leak). Even + // SHA-256 should not be shipped to clients — it gives an offline + // brute-force target if API-key entropy is low (certctl doesn't enforce + // a minimum on operator-supplied keys), and there is no business reason + // for any client to ever receive it. Post-G-2 the JSON tag is "-" and + // Agent.MarshalJSON below zeroes the field on a copy before delegating + // to the default marshal — defense in depth so a future tag-revert by + // refactor cannot reopen the leak. The DB column, repo SELECT/INSERT/ + // UPDATE paths, and service-side hashing are unchanged. See + // docs/architecture.md ER diagram (which documents DB shape, not API + // shape) and coverage-gap-audit-2026-04-24-v5/unified-audit.md + // cat-s5-apikey_leak for the full closure rationale. + APIKeyHash string `json:"-"` OS string `json:"os"` Architecture string `json:"architecture"` IPAddress string `json:"ip_address"` @@ -60,6 +79,31 @@ type Agent struct { RetiredReason *string `json:"retired_reason,omitempty"` } +// MarshalJSON implements json.Marshaler. It explicitly zeros APIKeyHash +// before serialization to defense-in-depth the `json:"-"` tag above. +// +// G-2 (P1): pre-G-2 the field was tagged `json:"api_key_hash"` and +// shipped on every /api/v1/agents response (cat-s5-apikey_leak). Post-G-2 +// the tag is "-" and this method enforces redaction even if the tag is +// reverted by a future refactor — the receiver is by-value so the +// APIKeyHash = "" assignment mutates only the marshal-time copy, never +// the caller's original. The type-alias trick (`type alias Agent`) +// breaks the recursive MarshalJSON call that would otherwise stack- +// overflow. Both *Agent and Agent receivers route through here because +// the json package looks the method up via reflect.Value, and a value +// receiver satisfies both kinds of pointer. +// +// Auditor's note for the next reader: do NOT remove this method even if +// the json:"-" tag stays. The CI guardrail at .github/workflows/ci.yml +// also blocks reintroduction at the tag site, but this method is the +// last line of defense for serialization paths that bypass struct tags +// (e.g., a future MarshalJSON on a parent struct that embeds Agent). +func (a Agent) MarshalJSON() ([]byte, error) { + type alias Agent // breaks recursion: alias has no MarshalJSON method + a.APIKeyHash = "" + return json.Marshal(alias(a)) +} + // IsRetired returns true when this agent has been soft-retired. // I-004: callers that iterate active agents (stats dashboard, stale-offline // sweeper, handler-facing list) must skip retired rows by default. diff --git a/internal/domain/connector_test.go b/internal/domain/connector_test.go index aae698b..a4a4d41 100644 --- a/internal/domain/connector_test.go +++ b/internal/domain/connector_test.go @@ -1,10 +1,22 @@ package domain import ( + "encoding/json" + "strings" "testing" "time" ) +// jsonMarshalDirect / jsonUnmarshalDirect are thin aliases for the +// stdlib encoding/json calls. They exist only to make the G-2 redaction +// tests below grep-friendly: any future test author searching for "how +// is APIKeyHash redaction tested" will land on these and the call sites, +// rather than having to grep through dozens of unrelated json.Marshal +// usages. +func jsonMarshalDirect(v interface{}) ([]byte, error) { return json.Marshal(v) } +func jsonUnmarshalDirect(data []byte, v interface{}) error { return json.Unmarshal(data, v) } +func containsSubstr(haystack, needle string) bool { return strings.Contains(haystack, needle) } + // TestAgent_IsRetired covers the I-004 soft-retirement predicate that gates // which callers hide an agent row from active listings. func TestAgent_IsRetired(t *testing.T) { @@ -53,3 +65,172 @@ func TestAgentDependencyCounts_HasDependencies(t *testing.T) { }) } } + +// G-2 (P1): the cat-s5-apikey_leak audit closure tests. Pre-G-2, +// Agent.APIKeyHash was tagged `json:"api_key_hash"` and shipped on +// every /api/v1/agents response — credential-derivative leak that gave +// offline brute-force targets to every authenticated client. Post-G-2 +// the tag is "-" AND Agent.MarshalJSON zeroes the field on a marshal- +// time copy. These tests pin both layers of the defense: +// +// 1. A populated APIKeyHash is never present in the marshaled JSON. +// 2. The redaction holds on *Agent, on slice elements, and on a +// sentinel literal-value check (so even a future field that +// happens to contain the same hash string would not appear). +// 3. The marshal-time copy does not mutate the caller's original — +// receiver is by-value, but pin it explicitly so a future refactor +// that switches to pointer-receiver gets caught. +// 4. Round-trip preserves every other field (hash dropped on encode, +// cannot reappear on decode because the wire never carries it). + +const g2LeakSentinel = "sha256:LEAKED-CREDENTIAL-DERIVATIVE-SENTINEL" + +// TestAgent_MarshalJSON_RedactsAPIKeyHash is the marshal-boundary +// contract test: a single Agent value with a populated APIKeyHash must +// not emit the field name nor the sentinel value. +func TestAgent_MarshalJSON_RedactsAPIKeyHash(t *testing.T) { + t.Parallel() + a := Agent{ + ID: "agent-test", + Name: "test-agent", + Hostname: "host.example", + Status: AgentStatusOnline, + RegisteredAt: time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC), + APIKeyHash: g2LeakSentinel, + OS: "linux", + Architecture: "amd64", + IPAddress: "10.0.0.1", + Version: "v2.0.49", + } + out, err := jsonMarshalDirect(a) + if err != nil { + t.Fatalf("Marshal returned error: %v", err) + } + body := string(out) + if containsSubstr(body, "api_key_hash") { + t.Errorf("marshaled body contains \"api_key_hash\" key — G-2 leak regressed:\n%s", body) + } + if containsSubstr(body, "APIKeyHash") { + t.Errorf("marshaled body contains \"APIKeyHash\" — type-alias redaction broke:\n%s", body) + } + if containsSubstr(body, g2LeakSentinel) { + t.Errorf("marshaled body contains the leak sentinel %q — value redaction broke:\n%s", g2LeakSentinel, body) + } + // Sanity: every OTHER non-zero field IS present (this guards against + // the type-alias trick accidentally dropping siblings). + for _, want := range []string{"agent-test", "test-agent", "host.example", "Online", "linux", "amd64", "10.0.0.1", "v2.0.49"} { + if !containsSubstr(body, want) { + t.Errorf("marshaled body missing expected field value %q:\n%s", want, body) + } + } +} + +// TestAgent_MarshalJSON_RedactsViaPointer covers the *Agent path that +// handlers hit when calling JSON(w, http.StatusOK, agent) with a *Agent +// from svc.GetAgent. A value-receiver MarshalJSON is reachable from +// pointer values via reflect; this test pins that contract. +func TestAgent_MarshalJSON_RedactsViaPointer(t *testing.T) { + t.Parallel() + a := &Agent{ID: "agent-x", APIKeyHash: g2LeakSentinel} + out, err := jsonMarshalDirect(a) + if err != nil { + t.Fatalf("Marshal *Agent returned error: %v", err) + } + if containsSubstr(string(out), g2LeakSentinel) { + t.Errorf("*Agent marshal leaked sentinel:\n%s", string(out)) + } + if containsSubstr(string(out), "api_key_hash") { + t.Errorf("*Agent marshal contains \"api_key_hash\" key:\n%s", string(out)) + } +} + +// TestAgent_MarshalJSON_RedactsInSlice covers the []domain.Agent path +// the ListAgents handler emits via PagedResponse{Data: agents}. Each +// element must be redacted independently. +func TestAgent_MarshalJSON_RedactsInSlice(t *testing.T) { + t.Parallel() + agents := []Agent{ + {ID: "agent-1", APIKeyHash: g2LeakSentinel + "-1"}, + {ID: "agent-2", APIKeyHash: g2LeakSentinel + "-2"}, + {ID: "agent-3", APIKeyHash: g2LeakSentinel + "-3"}, + } + out, err := jsonMarshalDirect(agents) + if err != nil { + t.Fatalf("Marshal []Agent returned error: %v", err) + } + body := string(out) + if containsSubstr(body, "api_key_hash") { + t.Errorf("[]Agent marshal contains \"api_key_hash\" key:\n%s", body) + } + for i := 1; i <= 3; i++ { + sentinel := g2LeakSentinel + "-" + string(rune('0'+i)) + if containsSubstr(body, sentinel) { + t.Errorf("[]Agent marshal leaked sentinel %q:\n%s", sentinel, body) + } + } + // Every agent ID is present — the redaction didn't accidentally + // strip the entire element. + for _, id := range []string{"agent-1", "agent-2", "agent-3"} { + if !containsSubstr(body, id) { + t.Errorf("[]Agent marshal missing element ID %q:\n%s", id, body) + } + } +} + +// TestAgent_MarshalJSON_DoesNotMutateReceiver pins the by-value-receiver +// contract: marshaling must not zero APIKeyHash on the caller's struct, +// only on the marshal-time copy. This guards against a future refactor +// that switches to pointer receiver and breaks every code path that +// marshals an Agent and then re-uses it (e.g., audit-event payload +// construction immediately after returning the agent in a handler). +func TestAgent_MarshalJSON_DoesNotMutateReceiver(t *testing.T) { + t.Parallel() + a := Agent{ID: "agent-keep", APIKeyHash: g2LeakSentinel} + if _, err := jsonMarshalDirect(a); err != nil { + t.Fatalf("Marshal returned error: %v", err) + } + if a.APIKeyHash != g2LeakSentinel { + t.Errorf("MarshalJSON mutated caller's APIKeyHash: got %q want %q", a.APIKeyHash, g2LeakSentinel) + } +} + +// TestAgent_MarshalJSON_RoundTrip pins the wire-shape contract: an +// Agent marshaled to JSON and unmarshaled back into a fresh Agent has +// every field preserved EXCEPT APIKeyHash, which the wire never carries. +// This double-confirms the redaction is a one-way guarantee at the +// serialization boundary, not an accidental on-decode behavior. +func TestAgent_MarshalJSON_RoundTrip(t *testing.T) { + t.Parallel() + now := time.Date(2026, 1, 1, 12, 0, 0, 0, time.UTC) + hb := now.Add(-5 * time.Minute) + original := Agent{ + ID: "agent-rt", + Name: "rt", + Hostname: "rt.host", + Status: AgentStatusOnline, + LastHeartbeatAt: &hb, + RegisteredAt: now, + APIKeyHash: g2LeakSentinel, + OS: "linux", + Architecture: "arm64", + IPAddress: "10.0.0.99", + Version: "v2.0.49", + } + out, err := jsonMarshalDirect(original) + if err != nil { + t.Fatalf("Marshal returned error: %v", err) + } + var got Agent + if err := jsonUnmarshalDirect(out, &got); err != nil { + t.Fatalf("Unmarshal returned error: %v", err) + } + if got.APIKeyHash != "" { + t.Errorf("APIKeyHash survived round-trip: got %q want empty (the wire must not carry it)", got.APIKeyHash) + } + if got.ID != original.ID || got.Name != original.Name || got.Hostname != original.Hostname { + t.Errorf("identity fields lost in round-trip: got %+v want %+v", got, original) + } + if got.OS != original.OS || got.Architecture != original.Architecture || got.IPAddress != original.IPAddress { + t.Errorf("metadata fields lost in round-trip: got %+v want %+v", got, original) + } +} diff --git a/web/src/api/types.ts b/web/src/api/types.ts index a8d5ab4..c55dbc7 100644 --- a/web/src/api/types.ts +++ b/web/src/api/types.ts @@ -51,6 +51,15 @@ export interface CertificateVersion { created_at: string; } +// G-2 (P1): `api_key_hash` is intentionally absent from this interface. +// The server-side struct (internal/domain/connector.go::Agent) carries +// the field for the auth-lookup path but redacts it via a custom +// MarshalJSON so it never reaches the JSON wire. Adding `api_key_hash` +// here would not magically populate it on the wire — and would mislead +// future contributors into thinking the field is part of the public +// API contract. See docs/architecture.md ER-diagram note and +// coverage-gap-audit-2026-04-24-v5/unified-audit.md cat-s5-apikey_leak +// for the closure rationale. export interface Agent { id: string; name: string;