feat(M48): continuous TLS health monitoring — endpoint state machine, shared tlsprobe, 8 API endpoints, GUI

Adds continuous TLS endpoint health monitoring that closes the deploy→verify→monitor loop.
After M25 verifies a deployment succeeded once, M48 continuously confirms it stays healthy.

Key components:
- Shared `internal/tlsprobe/` package extracted from network scanner for reuse
- Health status state machine: healthy → degraded (2 failures) → down (5 failures),
  plus cert_mismatch when served fingerprint differs from expected
- 8th scheduler loop (60s tick, per-endpoint configurable intervals)
- PostgreSQL migration 000011: endpoint_health_checks + endpoint_health_history tables
- 8 REST API endpoints (CRUD, history, acknowledge, summary)
- Health Monitor GUI page with summary bar, status table, create modal, auto-refresh
- 38 new tests (5 tlsprobe + 11 domain + 10 service + 8 handler + 4 frontend)
- All coverage thresholds maintained (service 68%, handler 83%, domain 87%, middleware 63%)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-04-15 21:45:45 -04:00
parent f2e60b93a3
commit 596d86a206
29 changed files with 3540 additions and 30 deletions
+42
View File
@@ -90,6 +90,14 @@ import {
updateIssuer,
updateTarget,
getPolicy,
listHealthChecks,
getHealthCheck,
createHealthCheck,
updateHealthCheck,
deleteHealthCheck,
getHealthCheckHistory,
acknowledgeHealthCheck,
getHealthCheckSummary,
} from './client';
// Mock global fetch
@@ -1236,4 +1244,38 @@ describe('API Client', () => {
expect(mockFetch.mock.calls[0][0]).toBe('/api/v1/policies/pol-1');
});
});
describe('Health Checks (M48)', () => {
it('listHealthChecks sends GET with optional filters', async () => {
mockFetch.mockReturnValueOnce(mockJsonResponse({ data: [], total: 0, page: 1, per_page: 50 }));
const result = await listHealthChecks({ status: 'degraded' });
expect(result.total).toBe(0);
expect(mockFetch.mock.calls[0][0]).toContain('/api/v1/health-checks');
expect(mockFetch.mock.calls[0][0]).toContain('status=degraded');
});
it('getHealthCheck sends GET with health check ID', async () => {
mockFetch.mockReturnValueOnce(mockJsonResponse({ id: 'hc-1', endpoint: 'example.com:443' }));
const result = await getHealthCheck('hc-1');
expect(result.id).toBe('hc-1');
expect(mockFetch.mock.calls[0][0]).toBe('/api/v1/health-checks/hc-1');
});
it('createHealthCheck sends POST with data', async () => {
mockFetch.mockReturnValueOnce(mockJsonResponse({ id: 'hc-1', endpoint: 'example.com:443' }));
const result = await createHealthCheck({ endpoint: 'example.com:443' });
expect(result.id).toBe('hc-1');
const [url, init] = mockFetch.mock.calls[0];
expect(url).toContain('/api/v1/health-checks');
expect(init.method).toBe('POST');
});
it('getHealthCheckSummary sends GET to /health-checks/summary', async () => {
mockFetch.mockReturnValueOnce(mockJsonResponse({ healthy: 5, degraded: 1, down: 0, cert_mismatch: 0, unknown: 2, total: 8 }));
const result = await getHealthCheckSummary();
expect(result.healthy).toBe(5);
expect(result.total).toBe(8);
expect(mockFetch.mock.calls[0][0]).toBe('/api/v1/health-checks/summary');
});
});
});
+36 -1
View File
@@ -1,4 +1,4 @@
import type { Certificate, CertificateVersion, Agent, Job, Notification, AuditEvent, PolicyRule, PolicyViolation, Issuer, Target, CertificateProfile, Owner, Team, AgentGroup, PaginatedResponse, DashboardSummary, CertificateStatusCount, ExpirationBucket, JobTrendDataPoint, IssuanceRateDataPoint, MetricsResponse, DiscoveredCertificate, DiscoveryScan, DiscoverySummary, NetworkScanTarget } from './types';
import type { Certificate, CertificateVersion, Agent, Job, Notification, AuditEvent, PolicyRule, PolicyViolation, Issuer, Target, CertificateProfile, Owner, Team, AgentGroup, PaginatedResponse, DashboardSummary, CertificateStatusCount, ExpirationBucket, JobTrendDataPoint, IssuanceRateDataPoint, MetricsResponse, DiscoveredCertificate, DiscoveryScan, DiscoverySummary, NetworkScanTarget, EndpointHealthCheck, HealthHistoryEntry, HealthCheckSummary } from './types';
const BASE = '/api/v1';
@@ -432,3 +432,38 @@ export const getPrometheusMetrics = () => {
// Health
export const getHealth = () => fetchJSON<{ status: string }>('/health');
// Health checks (M48)
export const listHealthChecks = (params?: { status?: string; certificate_id?: string; enabled?: string; page?: number; per_page?: number }): Promise<PaginatedResponse<EndpointHealthCheck>> => {
const query = new URLSearchParams();
if (params?.status) query.set('status', params.status);
if (params?.certificate_id) query.set('certificate_id', params.certificate_id);
if (params?.enabled) query.set('enabled', params.enabled);
if (params?.page) query.set('page', String(params.page));
if (params?.per_page) query.set('per_page', String(params.per_page));
const qs = query.toString();
return fetchJSON<PaginatedResponse<EndpointHealthCheck>>(`${BASE}/health-checks${qs ? '?' + qs : ''}`);
};
export const getHealthCheck = (id: string) =>
fetchJSON<EndpointHealthCheck>(`${BASE}/health-checks/${id}`);
export const createHealthCheck = (data: Partial<EndpointHealthCheck>) =>
fetchJSON<EndpointHealthCheck>(`${BASE}/health-checks`, { method: 'POST', body: JSON.stringify(data) });
export const updateHealthCheck = (id: string, data: Partial<EndpointHealthCheck>) =>
fetchJSON<EndpointHealthCheck>(`${BASE}/health-checks/${id}`, { method: 'PUT', body: JSON.stringify(data) });
export const deleteHealthCheck = (id: string) =>
fetchJSON<void>(`${BASE}/health-checks/${id}`, { method: 'DELETE' });
export const getHealthCheckHistory = (id: string, limit?: number) => {
const query = limit ? `?limit=${limit}` : '';
return fetchJSON<HealthHistoryEntry[]>(`${BASE}/health-checks/${id}/history${query}`);
};
export const acknowledgeHealthCheck = (id: string) =>
fetchJSON<void>(`${BASE}/health-checks/${id}/acknowledge`, { method: 'POST', body: JSON.stringify({}) });
export const getHealthCheckSummary = () =>
fetchJSON<HealthCheckSummary>(`${BASE}/health-checks/summary`);
+51
View File
@@ -347,3 +347,54 @@ export interface MetricsResponse {
measured_at: string;
};
}
// Health check types (M48)
export interface EndpointHealthCheck {
id: string;
endpoint: string;
certificate_id?: string;
network_scan_target_id?: string;
expected_fingerprint: string;
observed_fingerprint: string;
status: string;
consecutive_failures: number;
response_time_ms: number;
tls_version: string;
cipher_suite: string;
cert_subject: string;
cert_issuer: string;
cert_expiry?: string;
last_checked_at?: string;
last_success_at?: string;
last_failure_at?: string;
last_transition_at?: string;
failure_reason: string;
degraded_threshold: number;
down_threshold: number;
check_interval_seconds: number;
enabled: boolean;
acknowledged: boolean;
acknowledged_by?: string;
acknowledged_at?: string;
created_at: string;
updated_at: string;
}
export interface HealthHistoryEntry {
id: string;
health_check_id: string;
status: string;
response_time_ms: number;
fingerprint: string;
failure_reason: string;
checked_at: string;
}
export interface HealthCheckSummary {
healthy: number;
degraded: number;
down: number;
cert_mismatch: number;
unknown: number;
total: number;
}
+1
View File
@@ -18,6 +18,7 @@ const nav = [
{ to: '/agent-groups', label: 'Agent Groups', icon: 'M19 11H5m14 0a2 2 0 012 2v6a2 2 0 01-2 2H5a2 2 0 01-2-2v-6a2 2 0 012-2m14 0V9a2 2 0 00-2-2M5 11V9a2 2 0 012-2m0 0V5a2 2 0 012-2h6a2 2 0 012 2v2M7 7h10 M9 3v2m6-2v2' },
{ to: '/discovery', label: 'Discovery', icon: 'M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z' },
{ to: '/network-scans', label: 'Network Scans', icon: 'M3.055 11H5a2 2 0 012 2v1a2 2 0 002 2 2 2 0 012 2v2.945M8 3.935V5.5A2.5 2.5 0 0010.5 8h.5a2 2 0 012 2 2 2 0 104 0 2 2 0 012-2h1.064M15 20.488V18a2 2 0 012-2h3.064M21 12a9 9 0 11-18 0 9 9 0 0118 0z M9 12l2 2 4-4' },
{ to: '/health-monitor', label: 'Health Monitor', icon: 'M4.318 6.318a4.5 4.5 0 000 6.364L12 20.364l7.682-7.682a4.5 4.5 0 00-6.364-6.364L12 7.636l-1.318-1.318a4.5 4.5 0 00-6.364 0z' },
{ to: '/short-lived', label: 'Short-Lived', icon: 'M13 10V3L4 14h7v7l9-11h-7z' },
{ to: '/digest', label: 'Digest', icon: 'M3 8l7.89 5.26a2 2 0 002.22 0L21 8M5 19h14a2 2 0 002-2V7a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z' },
{ to: '/observability', label: 'Observability', icon: 'M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z' },
+6
View File
@@ -31,6 +31,12 @@ const statusStyles: Record<string, string> = {
pending: 'badge-warning',
failed: 'badge-danger',
read: 'badge-neutral',
// Health check statuses
healthy: 'badge-success',
degraded: 'badge-warning',
down: 'badge-danger',
cert_mismatch: 'badge-warning',
unknown: 'badge-neutral',
};
export default function StatusBadge({ status }: { status: string }) {
+2
View File
@@ -25,6 +25,7 @@ import ShortLivedPage from './pages/ShortLivedPage';
import AgentFleetPage from './pages/AgentFleetPage';
import DiscoveryPage from './pages/DiscoveryPage';
import NetworkScanPage from './pages/NetworkScanPage';
import HealthMonitorPage from './pages/HealthMonitorPage';
import DigestPage from './pages/DigestPage';
import ObservabilityPage from './pages/ObservabilityPage';
import JobDetailPage from './pages/JobDetailPage';
@@ -73,6 +74,7 @@ createRoot(document.getElementById('root')!).render(
<Route path="short-lived" element={<ShortLivedPage />} />
<Route path="discovery" element={<DiscoveryPage />} />
<Route path="network-scans" element={<NetworkScanPage />} />
<Route path="health-monitor" element={<HealthMonitorPage />} />
<Route path="digest" element={<DigestPage />} />
<Route path="observability" element={<ObservabilityPage />} />
</Route>
+302
View File
@@ -0,0 +1,302 @@
import { useState } from 'react';
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import {
listHealthChecks,
createHealthCheck,
deleteHealthCheck,
acknowledgeHealthCheck,
getHealthCheckSummary,
} from '../api/client';
import PageHeader from '../components/PageHeader';
import DataTable from '../components/DataTable';
import type { Column } from '../components/DataTable';
import ErrorState from '../components/ErrorState';
import StatusBadge from '../components/StatusBadge';
import { formatDateTime } from '../api/utils';
import type { EndpointHealthCheck, HealthCheckSummary } from '../api/types';
function CreateHealthCheckModal({ onClose, onCreate }: {
onClose: () => void;
onCreate: (data: Partial<EndpointHealthCheck>) => void;
}) {
const [endpoint, setEndpoint] = useState('');
const [expectedFingerprint, setExpectedFingerprint] = useState('');
const [checkInterval, setCheckInterval] = useState('300');
const [degradedThreshold, setDegradedThreshold] = useState('2');
const [downThreshold, setDownThreshold] = useState('5');
const handleSubmit = () => {
onCreate({
endpoint,
expected_fingerprint: expectedFingerprint,
check_interval_seconds: parseInt(checkInterval, 10),
degraded_threshold: parseInt(degradedThreshold, 10),
down_threshold: parseInt(downThreshold, 10),
enabled: true,
});
};
return (
<div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50" onClick={onClose}>
<div className="bg-white rounded-lg shadow-xl w-full max-w-lg mx-4" onClick={e => e.stopPropagation()}>
<div className="px-6 py-4 border-b border-surface-border">
<h3 className="text-lg font-semibold text-ink">New Health Check</h3>
<p className="text-sm text-ink-muted mt-1">Monitor a TLS endpoint for certificate health</p>
</div>
<div className="px-6 py-4 space-y-4">
<div>
<label className="block text-sm font-medium text-ink mb-1">Endpoint <span className="text-red-500">*</span></label>
<input
type="text"
value={endpoint}
onChange={e => setEndpoint(e.target.value)}
placeholder="e.g., example.com:443"
className="w-full border border-surface-border rounded px-3 py-2 text-sm text-ink bg-white focus:outline-none focus:ring-2 focus:ring-brand-500"
/>
</div>
<div>
<label className="block text-sm font-medium text-ink mb-1">Expected Fingerprint (SHA-256)</label>
<input
type="text"
value={expectedFingerprint}
onChange={e => setExpectedFingerprint(e.target.value)}
placeholder="Optional: auto-populated from deployment"
className="w-full border border-surface-border rounded px-3 py-2 text-sm text-ink bg-white font-mono focus:outline-none focus:ring-2 focus:ring-brand-500"
/>
<p className="text-xs text-ink-faint mt-1">Leave empty to auto-detect from first successful probe</p>
</div>
<div className="grid grid-cols-3 gap-3">
<div>
<label className="block text-sm font-medium text-ink mb-1">Check Interval (s)</label>
<input
type="number"
value={checkInterval}
onChange={e => setCheckInterval(e.target.value)}
min="60"
className="w-full border border-surface-border rounded px-3 py-2 text-sm text-ink bg-white focus:outline-none focus:ring-2 focus:ring-brand-500"
/>
</div>
<div>
<label className="block text-sm font-medium text-ink mb-1">Degraded Threshold</label>
<input
type="number"
value={degradedThreshold}
onChange={e => setDegradedThreshold(e.target.value)}
min="1"
className="w-full border border-surface-border rounded px-3 py-2 text-sm text-ink bg-white focus:outline-none focus:ring-2 focus:ring-brand-500"
/>
</div>
<div>
<label className="block text-sm font-medium text-ink mb-1">Down Threshold</label>
<input
type="number"
value={downThreshold}
onChange={e => setDownThreshold(e.target.value)}
min="1"
className="w-full border border-surface-border rounded px-3 py-2 text-sm text-ink bg-white focus:outline-none focus:ring-2 focus:ring-brand-500"
/>
</div>
</div>
</div>
<div className="px-6 py-3 border-t border-surface-border flex justify-end gap-2">
<button onClick={onClose} className="px-4 py-2 text-sm text-ink-muted hover:text-ink rounded border border-surface-border">
Cancel
</button>
<button
onClick={handleSubmit}
disabled={!endpoint.trim()}
className="px-4 py-2 text-sm text-white bg-brand-600 hover:bg-brand-700 rounded disabled:opacity-50 disabled:cursor-not-allowed"
>
Create
</button>
</div>
</div>
</div>
);
}
function SummaryBar({ summary }: { summary: HealthCheckSummary }) {
const items = [
{ label: 'Healthy', count: summary.healthy, color: 'text-green-600' },
{ label: 'Degraded', count: summary.degraded, color: 'text-yellow-600' },
{ label: 'Down', count: summary.down, color: 'text-red-600' },
{ label: 'Cert Mismatch', count: summary.cert_mismatch, color: 'text-orange-600' },
{ label: 'Unknown', count: summary.unknown, color: 'text-gray-500' },
];
return (
<div className="grid grid-cols-5 gap-3 px-6 py-4 bg-white border-b border-surface-border">
{items.map(item => (
<div key={item.label} className="text-center">
<p className={`text-2xl font-bold ${item.color}`}>{item.count}</p>
<p className="text-xs text-ink-muted mt-1">{item.label}</p>
</div>
))}
</div>
);
}
export default function HealthMonitorPage() {
const [showCreate, setShowCreate] = useState(false);
const [statusFilter, setStatusFilter] = useState<string | undefined>();
const queryClient = useQueryClient();
const { data, isLoading, error, refetch } = useQuery({
queryKey: ['health-checks', statusFilter],
queryFn: () => listHealthChecks({ status: statusFilter, page: 1, per_page: 100 }),
refetchInterval: 30000,
});
const summaryQuery = useQuery({
queryKey: ['health-checks-summary'],
queryFn: () => getHealthCheckSummary(),
refetchInterval: 30000,
});
const createMutation = useMutation({
mutationFn: createHealthCheck,
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['health-checks'] });
queryClient.invalidateQueries({ queryKey: ['health-checks-summary'] });
setShowCreate(false);
},
});
const deleteMutation = useMutation({
mutationFn: deleteHealthCheck,
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['health-checks'] });
queryClient.invalidateQueries({ queryKey: ['health-checks-summary'] });
},
});
const acknowledgeMutation = useMutation({
mutationFn: acknowledgeHealthCheck,
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['health-checks'] });
queryClient.invalidateQueries({ queryKey: ['health-checks-summary'] });
},
});
const columns: Column<EndpointHealthCheck>[] = [
{
key: 'endpoint',
label: 'Endpoint',
render: (row) => row.endpoint,
},
{
key: 'status',
label: 'Status',
render: (row) => <StatusBadge status={row.status} />,
},
{
key: 'response_time_ms',
label: 'Response Time (ms)',
render: (row) => row.response_time_ms ? `${row.response_time_ms}ms` : '—',
},
{
key: 'last_checked_at',
label: 'Last Checked',
render: (row) => row.last_checked_at ? formatDateTime(row.last_checked_at) : '—',
},
{
key: 'last_transition_at',
label: 'Last Transition',
render: (row) => row.last_transition_at ? formatDateTime(row.last_transition_at) : '—',
},
{
key: 'acknowledged',
label: 'Acknowledged',
render: (row) => row.acknowledged ? '✓' : '—',
},
{
key: 'actions',
label: 'Actions',
render: (row) => (
<div className="flex gap-2">
{!row.acknowledged && row.status !== 'healthy' && (
<button
onClick={() => acknowledgeMutation.mutate(row.id)}
className="text-xs px-2 py-1 text-blue-600 hover:text-blue-700 font-medium"
disabled={acknowledgeMutation.isPending}
>
Acknowledge
</button>
)}
<button
onClick={() => deleteMutation.mutate(row.id)}
className="text-xs px-2 py-1 text-red-600 hover:text-red-700 font-medium"
disabled={deleteMutation.isPending}
>
Delete
</button>
</div>
),
},
];
if (error) {
return <ErrorState error={error as Error} onRetry={refetch} />;
}
return (
<div className="flex flex-col overflow-hidden">
<PageHeader
title="Health Monitor"
subtitle="Monitor TLS endpoints for certificate health and deployment success"
/>
{summaryQuery.data && <SummaryBar summary={summaryQuery.data} />}
<div className="flex-1 flex flex-col overflow-hidden bg-white m-6 rounded-lg shadow">
<div className="px-6 py-4 border-b border-surface-border flex items-center justify-between">
<div className="flex items-center gap-4">
<select
value={statusFilter || ''}
onChange={e => setStatusFilter(e.target.value || undefined)}
className="text-sm border border-surface-border rounded px-3 py-2 text-ink bg-white focus:outline-none focus:ring-2 focus:ring-brand-500"
>
<option value="">All Statuses</option>
<option value="healthy">Healthy</option>
<option value="degraded">Degraded</option>
<option value="down">Down</option>
<option value="cert_mismatch">Cert Mismatch</option>
<option value="unknown">Unknown</option>
</select>
</div>
<button
onClick={() => setShowCreate(true)}
className="px-4 py-2 text-sm text-white bg-brand-600 hover:bg-brand-700 rounded"
>
New Health Check
</button>
</div>
<div className="flex-1 overflow-auto">
{isLoading ? (
<div className="flex items-center justify-center h-full">
<span className="text-ink-muted">Loading health checks...</span>
</div>
) : data && data.data.length > 0 ? (
<DataTable<EndpointHealthCheck>
columns={columns}
data={data.data}
keyField="id"
/>
) : (
<div className="flex items-center justify-center h-full">
<span className="text-ink-muted">No health checks configured</span>
</div>
)}
</div>
</div>
{showCreate && (
<CreateHealthCheckModal
onClose={() => setShowCreate(false)}
onCreate={data => createMutation.mutate(data)}
/>
)}
</div>
);
}