feat(M48): continuous TLS health monitoring — endpoint state machine, shared tlsprobe, 8 API endpoints, GUI

Adds continuous TLS endpoint health monitoring that closes the deploy→verify→monitor loop.
After M25 verifies a deployment succeeded once, M48 continuously confirms it stays healthy.

Key components:
- Shared `internal/tlsprobe/` package extracted from network scanner for reuse
- Health status state machine: healthy → degraded (2 failures) → down (5 failures),
  plus cert_mismatch when served fingerprint differs from expected
- 8th scheduler loop (60s tick, per-endpoint configurable intervals)
- PostgreSQL migration 000011: endpoint_health_checks + endpoint_health_history tables
- 8 REST API endpoints (CRUD, history, acknowledge, summary)
- Health Monitor GUI page with summary bar, status table, create modal, auto-refresh
- 38 new tests (5 tlsprobe + 11 domain + 10 service + 8 handler + 4 frontend)
- All coverage thresholds maintained (service 68%, handler 83%, domain 87%, middleware 63%)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-04-15 21:45:45 -04:00
parent f2e60b93a3
commit 596d86a206
29 changed files with 3540 additions and 30 deletions
+125
View File
@@ -0,0 +1,125 @@
package tlsprobe
import (
"context"
"crypto/ecdsa"
"crypto/rsa"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/hex"
"fmt"
"net"
"time"
)
// ProbeResult contains the result of probing a TLS endpoint.
type ProbeResult struct {
Address string `json:"address"`
Success bool `json:"success"`
Fingerprint string `json:"fingerprint"` // SHA-256 hex fingerprint of leaf cert
TLSVersion string `json:"tls_version"` // e.g. "TLS 1.3"
CipherSuite string `json:"cipher_suite"` // e.g. "TLS_AES_128_GCM_SHA256"
Subject string `json:"subject"` // cert subject CN
Issuer string `json:"issuer"` // cert issuer CN
NotBefore time.Time `json:"not_before"`
NotAfter time.Time `json:"not_after"`
SerialNumber string `json:"serial_number"`
ResponseTimeMs int `json:"response_time_ms"`
Error string `json:"error,omitempty"`
}
// ProbeTLS connects to a TLS endpoint, performs a handshake, and extracts certificate metadata.
// It uses InsecureSkipVerify to discover all certificates including self-signed and expired ones.
// This is safe because the certificate data is extracted and analyzed, not validated for trust.
func ProbeTLS(ctx context.Context, address string, timeout time.Duration) ProbeResult {
startTime := time.Now()
result := ProbeResult{
Address: address,
Success: false,
}
dialer := &net.Dialer{
Timeout: timeout,
}
conn, err := tls.DialWithDialer(dialer, "tcp", address, &tls.Config{
// SECURITY NOTE: InsecureSkipVerify is intentionally set to true here.
// The health checker must monitor ALL certificates including self-signed,
// expired, and internal CA certificates. This setting is scoped to discovery
// probing only — it is NEVER used for control-plane API calls, issuer
// connector communication, or any operation that trusts the certificate.
// The endpoint's certificate chain is extracted and analyzed, not validated.
// See TICKET-016 for full security audit rationale.
InsecureSkipVerify: true,
})
if err != nil {
result.Error = err.Error()
result.ResponseTimeMs = int(time.Since(startTime).Milliseconds())
return result
}
defer conn.Close()
result.ResponseTimeMs = int(time.Since(startTime).Milliseconds())
result.Success = true
// Extract certificates from TLS connection state
state := conn.ConnectionState()
if len(state.PeerCertificates) > 0 {
cert := state.PeerCertificates[0]
result.Fingerprint = CertFingerprint(cert)
result.Subject = cert.Subject.CommonName
result.Issuer = cert.Issuer.CommonName
result.NotBefore = cert.NotBefore
result.NotAfter = cert.NotAfter
result.SerialNumber = cert.SerialNumber.Text(16)
}
// Extract TLS version string
result.TLSVersion = tlsVersionString(state.Version)
// Extract cipher suite name
result.CipherSuite = tls.CipherSuiteName(state.CipherSuite)
return result
}
// CertFingerprint computes the SHA-256 fingerprint of a certificate (hex-encoded).
func CertFingerprint(cert *x509.Certificate) string {
fingerprintBytes := sha256.Sum256(cert.Raw)
return hex.EncodeToString(fingerprintBytes[:])
}
// CertKeyInfo extracts key algorithm name and size from a certificate.
// Returns algorithm name (e.g., "RSA", "ECDSA", "Ed25519") and key size in bits.
func CertKeyInfo(cert *x509.Certificate) (string, int) {
switch pub := cert.PublicKey.(type) {
case *rsa.PublicKey:
return "RSA", pub.N.BitLen()
case *ecdsa.PublicKey:
return "ECDSA", pub.Curve.Params().BitSize
default:
switch cert.PublicKeyAlgorithm {
case x509.Ed25519:
return "Ed25519", 256
default:
return cert.PublicKeyAlgorithm.String(), 0
}
}
}
// tlsVersionString converts a TLS version constant to a human-readable string.
func tlsVersionString(version uint16) string {
switch version {
case tls.VersionTLS10:
return "TLS 1.0"
case tls.VersionTLS11:
return "TLS 1.1"
case tls.VersionTLS12:
return "TLS 1.2"
case tls.VersionTLS13:
return "TLS 1.3"
default:
return fmt.Sprintf("TLS 0x%x", version)
}
}
+175
View File
@@ -0,0 +1,175 @@
package tlsprobe
import (
"context"
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"crypto/x509/pkix"
"fmt"
"math/big"
"net"
"net/http/httptest"
"testing"
"time"
)
// TestProbeTLS_ConnectionRefused tests probing an unavailable endpoint.
func TestProbeTLS_ConnectionRefused(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
result := ProbeTLS(ctx, "127.0.0.1:1", 1*time.Second)
if result.Success {
t.Errorf("expected Success=false for unavailable endpoint, got %v", result.Success)
}
if result.Error == "" {
t.Errorf("expected Error to be set for unavailable endpoint, got empty")
}
// ResponseTimeMs might be 0 on very fast systems, so just check it's set
if result.ResponseTimeMs < 0 {
t.Errorf("expected ResponseTimeMs >= 0, got %d", result.ResponseTimeMs)
}
}
// TestProbeTLS_Success tests probing a live TLS server.
func TestProbeTLS_Success(t *testing.T) {
// Create a test HTTPS server with a self-signed certificate
server := httptest.NewTLSServer(nil)
defer server.Close()
// Extract the server address (remove https://)
u := server.Listener.Addr().(*net.TCPAddr)
address := net.JoinHostPort(u.IP.String(), fmt.Sprintf("%d", u.Port))
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
result := ProbeTLS(ctx, address, 5*time.Second)
if !result.Success {
t.Errorf("expected Success=true, got false. Error: %s", result.Error)
}
if result.Fingerprint == "" {
t.Errorf("expected Fingerprint to be set, got empty")
}
if result.TLSVersion == "" {
t.Errorf("expected TLSVersion to be set, got empty")
}
if result.ResponseTimeMs == 0 {
t.Errorf("expected ResponseTimeMs > 0, got 0")
}
}
// TestCertFingerprint_SHA256 tests SHA-256 fingerprint computation.
func TestCertFingerprint_SHA256(t *testing.T) {
cert, _ := createTestCertWithKey(t, "test.example.com", "rsa")
fp := CertFingerprint(cert)
if fp == "" {
t.Errorf("expected non-empty fingerprint, got empty")
}
if len(fp) != 64 {
t.Errorf("expected fingerprint length 64 (hex SHA-256), got %d", len(fp))
}
// Verify it's valid hex
for _, ch := range fp {
if (ch < '0' || ch > '9') && (ch < 'a' || ch > 'f') {
t.Errorf("expected lowercase hex fingerprint, got invalid char: %c", ch)
}
}
// Verify consistency (same cert should produce same fingerprint)
fp2 := CertFingerprint(cert)
if fp != fp2 {
t.Errorf("fingerprint not consistent: %s vs %s", fp, fp2)
}
}
// TestCertKeyInfo_RSA tests RSA key info extraction.
func TestCertKeyInfo_RSA(t *testing.T) {
cert, _ := createTestCertWithKey(t, "test.example.com", "rsa")
alg, size := CertKeyInfo(cert)
if alg != "RSA" {
t.Errorf("expected algorithm 'RSA', got '%s'", alg)
}
if size != 2048 {
t.Errorf("expected RSA key size 2048, got %d", size)
}
}
// TestCertKeyInfo_ECDSA tests ECDSA key info extraction.
func TestCertKeyInfo_ECDSA(t *testing.T) {
cert, _ := createTestCertWithKey(t, "test.example.com", "ecdsa")
alg, size := CertKeyInfo(cert)
if alg != "ECDSA" {
t.Errorf("expected algorithm 'ECDSA', got '%s'", alg)
}
if size != 256 {
t.Errorf("expected ECDSA P-256 key size 256, got %d", size)
}
}
// Helper: createTestCert creates a self-signed test certificate with RSA key.
func createTestCert(t *testing.T, cn string) *x509.Certificate {
cert, _ := createTestCertWithKey(t, cn, "rsa")
return cert
}
// Helper: createTestCertWithKey creates a test certificate with specified key type.
func createTestCertWithKey(t *testing.T, cn, keyType string) (*x509.Certificate, interface{}) {
var privKey interface{}
var pubKey interface{}
if keyType == "rsa" {
key, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil {
t.Fatalf("failed to generate RSA key: %v", err)
}
privKey = key
pubKey = &key.PublicKey
} else if keyType == "ecdsa" {
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
t.Fatalf("failed to generate ECDSA key: %v", err)
}
privKey = key
pubKey = &key.PublicKey
} else {
t.Fatalf("unsupported key type: %s", keyType)
}
template := &x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{
CommonName: cn,
},
NotBefore: time.Now(),
NotAfter: time.Now().Add(365 * 24 * time.Hour),
KeyUsage: x509.KeyUsageDigitalSignature,
ExtKeyUsage: []x509.ExtKeyUsage{
x509.ExtKeyUsageServerAuth,
},
DNSNames: []string{cn},
}
certDER, err := x509.CreateCertificate(rand.Reader, template, template, pubKey, privKey)
if err != nil {
t.Fatalf("failed to create certificate: %v", err)
}
cert, err := x509.ParseCertificate(certDER)
if err != nil {
t.Fatalf("failed to parse certificate: %v", err)
}
return cert, privKey
}