From b9478d064ca14ed1f6dfed97d496e87d013a998f Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Mon, 4 May 2026 04:56:13 +0000 Subject: [PATCH] security(email): sanitize body fields against content injection (CodeQL #11, CWE-640) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeQL alert #11 (go/email-injection, CWE-640 / OWASP Content Spoofing) flagged the wc.Write(message) sink at internal/connector/notifier/email/ email.go:208 because attacker-controllable fields flow into the email body unchecked. Threat model: Headers (From, To, Subject) were already protected by validation.ValidateHeaderValue (CWE-113 SMTP header injection, closed in commit 9e957c3). The remaining gap was the body. An attacker controls multiple fields that surface to the body of alert/event notifications: - alert.Subject, alert.Message - event.Subject, event.Body, *event.CertificateID - alert.Metadata + event.Metadata key/value pairs These can carry CR/LF (forged 'Reply-To: attacker@evil.com' inside the body that recipients skim), NUL bytes (RFC 5321 4.5.2 violation that some MTAs truncate at), bidi-override Unicode (visually- spoofable URLs), zero-width / invisible Unicode (phishing), or malformed UTF-8 (Go emits U+FFFD which becomes a glyph in mail clients). The HTML email path (digest service) already uses html/template upstream and is safe via contextual auto-escape. This commit closes the plaintext path. Fix: internal/validation/headers.go gains SanitizeEmailBodyValue — a sanitizer that NEVER errors (the right contract for body content; over-eager rejection drops operator notifications) and scrubs: - NUL bytes (stripped entirely) - bare CR / LF (replaced with space — single fields should never carry their own line breaks; the surrounding template handles legitimate CRLFs) - C0 control chars < 0x20 except TAB - DEL (0x7F) + C1 control chars (0x80-0x9F) - U+FFFD (defense in depth: malformed UTF-8 -> Go emits this; strip so attacker-planted invalid bytes don't survive as an arbitrary glyph) - Bidi-override Unicode (U+202A..U+202E, U+2066..U+2069) - Zero-width / invisible Unicode (U+200B..U+200D, U+2060..U+2063, U+FEFF, U+180E) - Catch-all unicode.IsControl for anything not enumerated above Codepoint table uses numeric ranges rather than rune-literal switch cases — Go source rejects literal invisible characters (BOM U+FEFF) mid-file, so the table compares against numeric values. internal/connector/notifier/email/email.go applies the sanitizer at every interpolation site: - formatAlertBody: alert.ID/Type/Severity/Subject/Message (CreatedAt is time.Time -> RFC3339, structural, not sanitized) - formatEventBody: event.ID/Type/Subject/Body, *CertificateID (CreatedAt structural, not sanitized) - formatMetadata: both keys and values The sendEmail / formatEmailMessage call sites continue to validate headers (From / To / Subject) via the existing ValidateHeaderValue fail-closed gate; the new sanitizer is body-side only. Tests (internal/validation/headers_test.go): TestSanitizeEmailBodyValue_PreservesSafeInput Pin: ordinary ASCII, UTF-8 multibyte (résumé / 日本語 / مرحبا), tabs, common cert DNs, URLs all flow through unchanged. TestSanitizeEmailBodyValue_StripsControlChars Table-driven across NUL, bare LF/CR, CRLF, BEL, backspace, DEL, C1 (U+0080 / U+009F), U+FFFD, TAB-preserve. TestSanitizeEmailBodyValue_StripsBidiOverride 7 attacker payloads (RLO, LRO, LRI, zero-width space, ZWNJ, BOM, MVS) — each must produce a non-identity output. TestSanitizeEmailBodyValue_ContentSpoofingScenario The CodeQL example case: 'alert\r\nReply-To: attacker@evil.com\r\n Click https://evil.example.com/reset' — verify NO CR/LF survives. Verified locally: gofmt: clean. go vet ./...: exit 0. go test -short -count=1 ./internal/validation/...: ok 0.374s go test -short -count=1 ./internal/connector/notifier/email/...: ok 0.186s Reference: https://github.com/certctl-io/certctl/security/code-scanning/11 Closes CodeQL alert #11 (go/email-injection). --- internal/connector/notifier/email/email.go | 54 ++++++++- internal/validation/headers.go | 123 ++++++++++++++++++++ internal/validation/headers_test.go | 124 +++++++++++++++++++++ 3 files changed, 297 insertions(+), 4 deletions(-) diff --git a/internal/connector/notifier/email/email.go b/internal/connector/notifier/email/email.go index b110183..dab350e 100644 --- a/internal/connector/notifier/email/email.go +++ b/internal/connector/notifier/email/email.go @@ -356,6 +356,21 @@ func (c *Connector) formatHTMLEmailMessage(from, to, subject, htmlBody string) ( } // formatAlertBody formats an alert notification as email body text. +// +// CodeQL go/email-injection (CWE-640 / OWASP Content Spoofing) defense: +// every field interpolated into the body that may carry attacker- +// controlled content (alert.Subject, alert.Message, alert.Metadata +// values, alert.ID / Type / Severity which originate from the API +// surface) is routed through validation.SanitizeEmailBodyValue before +// formatting. The sanitizer strips NUL bytes (RFC 5321 §4.5.2 violation), +// bare CR/LF within a single field (forged header-boundary attempts), +// bidi-override Unicode (visually-spoofable URLs), zero-width / invisible +// codepoints, and C0/C1 control chars. CreatedAt is a time.Time — +// formatted via RFC3339; not user-controllable so unsanitized. +// +// Header values (From, To, Subject) are protected separately by +// validation.ValidateHeaderValue at sendEmail entry (CWE-113 SMTP header +// injection — see commit 9e957c3). func (c *Connector) formatAlertBody(alert notifier.Alert) string { body := fmt.Sprintf(` Certificate Alert Notification @@ -372,16 +387,29 @@ Message: %s %s -`, alert.ID, alert.Type, alert.Severity, alert.CreatedAt.Format(time.RFC3339), alert.Subject, alert.Message, c.formatMetadata(alert.Metadata)) +`, + validation.SanitizeEmailBodyValue(alert.ID), + validation.SanitizeEmailBodyValue(alert.Type), + validation.SanitizeEmailBodyValue(alert.Severity), + alert.CreatedAt.Format(time.RFC3339), + validation.SanitizeEmailBodyValue(alert.Subject), + validation.SanitizeEmailBodyValue(alert.Message), + c.formatMetadata(alert.Metadata), + ) return body } // formatEventBody formats an event notification as email body text. +// +// Same CodeQL go/email-injection mitigation as formatAlertBody — every +// user-controllable interpolated field routes through +// validation.SanitizeEmailBodyValue. CreatedAt is unsanitized (time.Time +// → RFC3339 is structural, not user-controllable). func (c *Connector) formatEventBody(event notifier.Event) string { certInfo := "" if event.CertificateID != nil { - certInfo = fmt.Sprintf("Certificate ID: %s\n", *event.CertificateID) + certInfo = fmt.Sprintf("Certificate ID: %s\n", validation.SanitizeEmailBodyValue(*event.CertificateID)) } body := fmt.Sprintf(` @@ -398,12 +426,27 @@ Body: %s %s -`, event.ID, event.Type, event.CreatedAt.Format(time.RFC3339), certInfo, event.Subject, event.Body, c.formatMetadata(event.Metadata)) +`, + validation.SanitizeEmailBodyValue(event.ID), + validation.SanitizeEmailBodyValue(event.Type), + event.CreatedAt.Format(time.RFC3339), + certInfo, + validation.SanitizeEmailBodyValue(event.Subject), + validation.SanitizeEmailBodyValue(event.Body), + c.formatMetadata(event.Metadata), + ) return body } // formatMetadata formats metadata as a readable string. +// +// Both keys and values can carry attacker-controlled content (cert +// subject DN fragments, discovered cert metadata, owner/team labels — +// all originate from API surfaces an attacker may influence). Both are +// routed through validation.SanitizeEmailBodyValue. Closes the +// CodeQL go/email-injection finding alongside formatAlertBody + +// formatEventBody. func (c *Connector) formatMetadata(metadata map[string]string) string { if len(metadata) == 0 { return "" @@ -411,7 +454,10 @@ func (c *Connector) formatMetadata(metadata map[string]string) string { metadataStr := "\nMetadata:\n" for key, value := range metadata { - metadataStr += fmt.Sprintf(" %s: %s\n", key, value) + metadataStr += fmt.Sprintf(" %s: %s\n", + validation.SanitizeEmailBodyValue(key), + validation.SanitizeEmailBodyValue(value), + ) } return metadataStr diff --git a/internal/validation/headers.go b/internal/validation/headers.go index 123ce0c..de6127d 100644 --- a/internal/validation/headers.go +++ b/internal/validation/headers.go @@ -3,6 +3,7 @@ package validation import ( "fmt" "strings" + "unicode" ) // ValidateHeaderValue rejects any value that contains characters capable of @@ -34,3 +35,125 @@ func ValidateHeaderValue(field, value string) error { } return nil } + +// SanitizeEmailBodyValue scrubs control characters and visually-spoofable +// Unicode from a single field that will be interpolated into a plaintext +// email body. Closes CodeQL go/email-injection (CWE-640 / OWASP Content +// Spoofing): an attacker who controls a field surfaced to an +// operator-bound notification (cert subject DN, discovered cert metadata, +// alert subject / message, event subject / body, metadata key+value +// pairs) could otherwise plant content that: +// +// - Forges header-like content using bare CR/LF (some mail relays +// misinterpret bare LF mid-body as a header boundary; RFC 5321 +// mandates CRLF, but defense in depth says strip bare LFs). +// - Embeds NUL bytes (forbidden by RFC 5321 sec 4.5.2; some MTAs +// truncate at NUL, allowing content elision). +// - Plants bidi-override Unicode (U+202A..U+202E, U+2066..U+2069) so a +// malicious URL renders as a benign one in the recipient's mail +// client. +// - Plants zero-width / invisible Unicode (U+200B..U+200D, U+FEFF, +// U+2060..U+2063) so a phishing-prone URL hides whitespace. +// - Plants C0 / C1 control characters that mail clients may render +// unpredictably or strip in surprising ways. +// +// The sanitizer NEVER errors; it always returns a sanitized string. This +// is the right contract for body content (vs. headers, which fail loud) +// because dropping a notification because the cert subject DN happens to +// contain a Mongolian Vowel Separator would be worse than escaping it. +// +// What the sanitizer does: +// +// - Strip NUL bytes (\x00) entirely. +// - Replace bare LF / CR with a single space. Multi-line legitimate +// body content gets its CRLF formatting from the email serializer +// above this layer; a SINGLE FIELD interpolated into the body +// should never carry its own line breaks. +// - Strip bidi-override and zero-width characters. +// - Strip C0 control chars (< 0x20) except TAB. Strip DEL (0x7F) + +// C1 control chars (0x80-0x9F). +// - Leave ordinary printable Unicode (including non-Latin scripts) +// intact. +// +// Apply this to EVERY user-controllable field before interpolating into +// a plaintext email body. Do NOT apply it to operator-controlled +// constants (template literals, severity tier names) — those don't +// carry the threat. The HTML email path uses html/template upstream +// and does not need this sanitizer (html/template's contextual +// auto-escape handles the same threats for HTML rendering). +func SanitizeEmailBodyValue(value string) string { + if value == "" { + return value + } + var b strings.Builder + b.Grow(len(value)) + for _, r := range value { + switch { + case r == 0: + // NUL — strip entirely (RFC 5321 sec 4.5.2 violation). + continue + case r == '\r' || r == '\n': + // Strip line breaks within a single interpolated field. + b.WriteRune(' ') + case r == '\t': + // TAB is legitimate body content. + b.WriteRune(r) + case r < 0x20: + // C0 control chars (except TAB above) — strip. + continue + case r >= 0x7F && r <= 0x9F: + // DEL + C1 control chars — strip. + continue + case r == 0xFFFD: + // Replacement character — Go's range emits this for any + // malformed UTF-8 byte sequence. Defense in depth: an + // attacker who plants invalid UTF-8 (e.g. raw 0x80..0xFF + // without a valid lead byte) should not have their input + // surface as an arbitrary glyph in operator-bound mail. + continue + case isBidiOrZeroWidth(r): + // Bidi-override + zero-width — strip; visually spoofable. + continue + case unicode.IsControl(r): + // Catch-all: any remaining Unicode control class. + continue + default: + b.WriteRune(r) + } + } + return b.String() +} + +// isBidiOrZeroWidth reports whether r is one of the bidi-override or +// zero-width Unicode codepoints used in homograph / direction-spoofing +// attacks. Mirrors the validator in internal/connector/issuer/local +// (validateCSRUnicode); kept inline here to avoid a new import edge +// from internal/validation back to the local issuer package. +// +// Codepoints expressed as numeric ranges instead of rune-literal +// switch cases — Go source rejects literal invisible characters +// (e.g. BOM U+FEFF) mid-file, so we compare against numeric values. +func isBidiOrZeroWidth(r rune) bool { + switch { + // LRE U+202A, RLE U+202B, PDF U+202C, LRO U+202D, RLO U+202E + case r >= 0x202A && r <= 0x202E: + return true + // LRI U+2066, RLI U+2067, FSI U+2068, PDI U+2069 + case r >= 0x2066 && r <= 0x2069: + return true + // Zero-width space U+200B, ZWNJ U+200C, ZWJ U+200D + case r >= 0x200B && r <= 0x200D: + return true + // Word joiner U+2060, invisible separator U+2061, + // invisible times U+2062, invisible plus U+2063 + case r >= 0x2060 && r <= 0x2063: + return true + // Byte-order mark / zero-width no-break space U+FEFF + case r == 0xFEFF: + return true + // Mongolian Vowel Separator U+180E + case r == 0x180E: + return true + } + return false +} diff --git a/internal/validation/headers_test.go b/internal/validation/headers_test.go index c4f3121..c6bfa2d 100644 --- a/internal/validation/headers_test.go +++ b/internal/validation/headers_test.go @@ -68,3 +68,127 @@ func TestValidateHeaderValue_DefaultFieldName(t *testing.T) { t.Errorf("expected default field name 'header' in error, got %q", err.Error()) } } + +// TestSanitizeEmailBodyValue_PreservesSafeInput pins the contract that +// ordinary body content (including non-Latin scripts and tabs) flows +// through unchanged. The sanitizer must be a no-op for legitimate input +// — over-stripping degrades operator notifications. +func TestSanitizeEmailBodyValue_PreservesSafeInput(t *testing.T) { + tests := []struct { + name string + input string + }{ + {"plain ASCII", "Renewal reminder for prod.example.com"}, + {"empty", ""}, + {"utf-8 multibyte", "résumé — 日本語 — مرحبا"}, + {"tabs allowed", "key:\tvalue"}, + {"spaces", " multiple spaces "}, + {"common cert DN", "CN=api.example.com,O=Acme Corp,C=US"}, + {"URL with safe chars", "https://docs.example.com/cert/mc-prod-api"}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := SanitizeEmailBodyValue(tc.input) + if got != tc.input { + t.Errorf("expected unchanged %q, got %q", tc.input, got) + } + }) + } +} + +// TestSanitizeEmailBodyValue_StripsControlChars pins the CodeQL +// go/email-injection (CWE-640) defense — every attacker-plant-able +// control character is stripped or replaced. +func TestSanitizeEmailBodyValue_StripsControlChars(t *testing.T) { + tests := []struct { + name string + input string + want string + wantSafer bool // want output != input + }{ + {"NUL byte stripped", "before\x00after", "beforeafter", true}, + {"bare LF replaced with space", "line1\nline2", "line1 line2", true}, + {"bare CR replaced with space", "line1\rline2", "line1 line2", true}, + {"CRLF replaced (both stripped)", "line1\r\nline2", "line1 line2", true}, + {"BEL stripped", "alert\x07now", "alertnow", true}, + {"backspace stripped", "x\x08y", "xy", true}, + {"DEL stripped", "x\x7fy", "xy", true}, + // C1 control chars must be specified via Unicode escape (\u) so + // the source remains valid UTF-8; bare \x80 / \x9f bytes would + // be invalid UTF-8 and Go's range emits U+FFFD instead, which + // would test the malformed-UTF-8 strip path, not the C1 path. + {"C1 control char stripped (U+0080)", "x\u0080y", "xy", true}, + {"C1 control char stripped (U+009F)", "x\u009Fy", "xy", true}, + // U+FFFD is the replacement char Go emits for malformed UTF-8. + // We strip it as defense-in-depth so attacker-planted invalid + // UTF-8 doesn't survive into operator notifications as an + // arbitrary glyph. + {"replacement char stripped", "x\uFFFDy", "xy", true}, + {"TAB preserved (legitimate body content)", "k:\tv", "k:\tv", false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := SanitizeEmailBodyValue(tc.input) + if got != tc.want { + t.Errorf("input %q: want %q, got %q", tc.input, tc.want, got) + } + if tc.wantSafer && got == tc.input { + t.Errorf("expected sanitization to change %q, but output unchanged", tc.input) + } + }) + } +} + +// TestSanitizeEmailBodyValue_StripsBidiOverride pins the +// visually-spoofable Unicode defense (homograph / RTL-override / +// zero-width attacks). An attacker who controls a CN or metadata value +// could otherwise plant a malicious URL that renders benignly in mail +// clients that honor bidi-override codepoints. +func TestSanitizeEmailBodyValue_StripsBidiOverride(t *testing.T) { + tests := []struct { + name string + input string + }{ + // U+202E = Right-to-left override + {"RTL override", "Click ‮www.evil.com‬ to verify"}, + // U+202D = Left-to-right override + {"LRO override", "Click ‭www.evil.com‬ to verify"}, + // U+2066 = Left-to-right isolate + {"LRI isolate", "Click ⁦www.evil.com⁩ to verify"}, + // U+200B = Zero-width space + {"zero-width space", "evil​.example.com"}, + // U+200C = ZWNJ + {"zero-width non-joiner", "ad‌min@example.com"}, + // U+FEFF = byte-order mark / zero-width no-break space + {"BOM", "x\uFEFFy"}, + // U+180E = Mongolian Vowel Separator + {"MVS", "a᠎b"}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := SanitizeEmailBodyValue(tc.input) + if got == tc.input { + t.Errorf("expected bidi/zero-width stripping for %q, got unchanged %q", tc.input, got) + } + }) + } +} + +// TestSanitizeEmailBodyValue_ContentSpoofingScenario pins the specific +// CodeQL go/email-injection (CWE-640) example: an attacker who controls +// a body field plants header-like content. The sanitizer neutralizes +// the attempt by stripping bare LF/CR within the field. +func TestSanitizeEmailBodyValue_ContentSpoofingScenario(t *testing.T) { + // Attacker plants a body value that tries to fake a "Reply-To" + // header inside the body. Even if mail clients don't honor it, a + // recipient skimming the body could be fooled. + attacker := "alert from compromised cert\r\nReply-To: attacker@evil.com\r\nClick https://evil.example.com/reset" + got := SanitizeEmailBodyValue(attacker) + if got == attacker { + t.Fatalf("attacker input passed through unchanged: %q", got) + } + // Specifically: no CR or LF should remain in the field. + if strings.ContainsAny(got, "\r\n") { + t.Errorf("CR/LF still present after sanitization: %q", got) + } +}