fix(bundle-3): MCP Trust-Boundary Fencing — 5 audit findings closed

Closes Audit-2026-04-25 H-002, H-003, M-003, M-004, M-005 (all CWE-1039 LLM Prompt Injection at the MCP↔consumer trust boundary, TB-7). Strategy: wrapper-layer fencing. All 87 MCP tools route their success path through textResult and their failure path through errorResult. By fencing at those two wrappers we cover every existing tool AND every future tool with a single change — no per-tool wiring required. What changed - internal/mcp/fence.go (new) — FenceUntrusted helper with strategy doc + per-finding rationale. Both fenceMCPResponse and fenceMCPError use it internally. - internal/mcp/tools.go — textResult wraps response body via fenceMCPResponse; errorResult wraps error string via fenceMCPError. - internal/mcp/tools_test.go — TestTextResult / TestErrorResult updated to assert fenced shape (start marker + end marker + inner body). - internal/mcp/injection_regression_test.go (new) — 5 regression test functions, one per audit finding, each replays 5 classic LLM injection payloads (instruction_override, system_role_spoofing, delimiter_break_attempt, markdown_link_phishing, data_exfil_via_url) and asserts the planted payload appears VERBATIM (preservation, operator visibility) INSIDE the fence boundaries. - internal/mcp/fence_guardrail_test.go (new) — CI guardrail that walks every non-test .go file in the mcp package and fails if it finds a bare gomcp.CallToolResult literal outside tools.go. Prevents future tools from silently bypassing the fence. Delimiter-forgery defense The naive constant fence (--- UNTRUSTED MCP_RESPONSE END ---) is forgeable: an attacker who controls a field value can plant the literal end marker and "break out" of the fence. Defense: every fence call generates a 6-byte crypto/rand nonce, hex-encoded, and embeds it in BOTH the START and END markers. An attacker would need to predict the nonce (2^48 search per fence) to forge a matching END inside the payload. The delimiter_break_attempt regression test exercises this. Per-finding mapping - H-002 Cert Subject DN injection (CSR submitter controlled) → TestMCP_PromptInjection_H002_CertSubjectDN - H-003 Discovered cert metadata injection (cert owner controlled) → TestMCP_PromptInjection_H003_DiscoveredCertMetadata - M-003 Agent heartbeat injection (agent self-reports hostname/OS/IP) → TestMCP_PromptInjection_M003_AgentHeartbeat - M-004 Upstream CA error injection (CA controls error string) → TestMCP_PromptInjection_M004_UpstreamCAError - M-005 Audit details + notification body injection (downstream actors control these) → TestMCP_PromptInjection_M005_AuditDetailsAndNotifications Verification gates - go vet ./... → clean - go build ./... → clean - go test -short -count=1 ./... → all packages pass - go test -count=1 ./internal/mcp/... → all packages pass - npx tsc --noEmit (web) → clean - npx vitest run (web) → 337 passed - python3 yaml.safe_load(api/openapi.yaml) → 89 paths, 56 schemas Threat-model placement: TB-7 (MCP↔LLM consumer). certctl owns the boundary; consumer-side prompt engineering is recommended but not relied upon. Defense-in-depth: per-call nonce closes the delimiter-forgery edge case that constant fences would have left exposed. Bundle 3 of the 2026-04-25 comprehensive audit (88 findings).
2026-06-11 23:08:51 +00:00 · 2026-04-25 22:44:33 +00:00
parent 7f544b86f6
commit 2c5383da9f
5 changed files with 517 additions and 4 deletions
@@ -126,6 +126,10 @@ func TestPaginationQuery(t *testing.T) {
 }

 func TestTextResult(t *testing.T) {
+	// Bundle-3: textResult wraps the response body in untrusted-data fences.
+	// The fence labels the data as MCP_RESPONSE so LLM consumers can be
+	// instructed to interpret the inner JSON as opaque content rather than
+	// instructions. See internal/mcp/fence.go for the strategy doc.
 	data := json.RawMessage(`{"id":"mc-test","status":"Active"}`)
 	result, metadata, err := textResult(data)
 	if err != nil {
@@ -144,12 +148,22 @@ func TestTextResult(t *testing.T) {
 	if !ok {
 		t.Fatal("expected TextContent type")
 	}
-	if tc.Text != `{"id":"mc-test","status":"Active"}` {
-		t.Errorf("unexpected text content: %s", tc.Text)
+	if !strings.Contains(tc.Text, "--- UNTRUSTED MCP_RESPONSE START") {
+		t.Errorf("missing start fence in text content: %s", tc.Text)
+	}
+	if !strings.Contains(tc.Text, "--- UNTRUSTED MCP_RESPONSE END") {
+		t.Errorf("missing end fence in text content: %s", tc.Text)
+	}
+	if !strings.Contains(tc.Text, `{"id":"mc-test","status":"Active"}`) {
+		t.Errorf("inner body missing from fenced content: %s", tc.Text)
 	}
 }

 func TestErrorResult(t *testing.T) {
+	// Bundle-3: errorResult wraps the error message in untrusted-data fences.
+	// Upstream-CA error strings are attacker-controllable (M-004), so the
+	// fence prevents an injected "ignore previous instructions" payload in
+	// a CA error from steering the LLM consumer.
 	result, _, err := errorResult(http.ErrServerClosed)
 	if result != nil {
 		t.Errorf("expected nil result, got %v", result)
@@ -157,6 +171,15 @@ func TestErrorResult(t *testing.T) {
 	if err == nil {
 		t.Fatal("expected non-nil error")
 	}
+	if !strings.Contains(err.Error(), "--- UNTRUSTED MCP_ERROR START") {
+		t.Errorf("missing start fence in error: %s", err.Error())
+	}
+	if !strings.Contains(err.Error(), "--- UNTRUSTED MCP_ERROR END") {
+		t.Errorf("missing end fence in error: %s", err.Error())
+	}
+	if !strings.Contains(err.Error(), http.ErrServerClosed.Error()) {
+		t.Errorf("inner error missing from fenced content: %s", err.Error())
+	}
 }

 // TestToolEndToEnd_ListCertificates verifies the full flow: