feat(M48): continuous TLS health monitoring — endpoint state machine, shared tlsprobe, 8 API endpoints, GUI

Adds continuous TLS endpoint health monitoring that closes the deploy→verify→monitor loop.
After M25 verifies a deployment succeeded once, M48 continuously confirms it stays healthy.

Key components:
- Shared `internal/tlsprobe/` package extracted from network scanner for reuse
- Health status state machine: healthy → degraded (2 failures) → down (5 failures),
  plus cert_mismatch when served fingerprint differs from expected
- 8th scheduler loop (60s tick, per-endpoint configurable intervals)
- PostgreSQL migration 000011: endpoint_health_checks + endpoint_health_history tables
- 8 REST API endpoints (CRUD, history, acknowledge, summary)
- Health Monitor GUI page with summary bar, status table, create modal, auto-refresh
- 38 new tests (5 tlsprobe + 11 domain + 10 service + 8 handler + 4 frontend)
- All coverage thresholds maintained (service 68%, handler 83%, domain 87%, middleware 63%)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-04-15 21:45:45 -04:00
parent f2e60b93a3
commit 596d86a206
29 changed files with 3540 additions and 30 deletions
+36 -1
View File
@@ -1,4 +1,4 @@
import type { Certificate, CertificateVersion, Agent, Job, Notification, AuditEvent, PolicyRule, PolicyViolation, Issuer, Target, CertificateProfile, Owner, Team, AgentGroup, PaginatedResponse, DashboardSummary, CertificateStatusCount, ExpirationBucket, JobTrendDataPoint, IssuanceRateDataPoint, MetricsResponse, DiscoveredCertificate, DiscoveryScan, DiscoverySummary, NetworkScanTarget } from './types';
import type { Certificate, CertificateVersion, Agent, Job, Notification, AuditEvent, PolicyRule, PolicyViolation, Issuer, Target, CertificateProfile, Owner, Team, AgentGroup, PaginatedResponse, DashboardSummary, CertificateStatusCount, ExpirationBucket, JobTrendDataPoint, IssuanceRateDataPoint, MetricsResponse, DiscoveredCertificate, DiscoveryScan, DiscoverySummary, NetworkScanTarget, EndpointHealthCheck, HealthHistoryEntry, HealthCheckSummary } from './types';
const BASE = '/api/v1';
@@ -432,3 +432,38 @@ export const getPrometheusMetrics = () => {
// Health
export const getHealth = () => fetchJSON<{ status: string }>('/health');
// Health checks (M48)
export const listHealthChecks = (params?: { status?: string; certificate_id?: string; enabled?: string; page?: number; per_page?: number }): Promise<PaginatedResponse<EndpointHealthCheck>> => {
const query = new URLSearchParams();
if (params?.status) query.set('status', params.status);
if (params?.certificate_id) query.set('certificate_id', params.certificate_id);
if (params?.enabled) query.set('enabled', params.enabled);
if (params?.page) query.set('page', String(params.page));
if (params?.per_page) query.set('per_page', String(params.per_page));
const qs = query.toString();
return fetchJSON<PaginatedResponse<EndpointHealthCheck>>(`${BASE}/health-checks${qs ? '?' + qs : ''}`);
};
export const getHealthCheck = (id: string) =>
fetchJSON<EndpointHealthCheck>(`${BASE}/health-checks/${id}`);
export const createHealthCheck = (data: Partial<EndpointHealthCheck>) =>
fetchJSON<EndpointHealthCheck>(`${BASE}/health-checks`, { method: 'POST', body: JSON.stringify(data) });
export const updateHealthCheck = (id: string, data: Partial<EndpointHealthCheck>) =>
fetchJSON<EndpointHealthCheck>(`${BASE}/health-checks/${id}`, { method: 'PUT', body: JSON.stringify(data) });
export const deleteHealthCheck = (id: string) =>
fetchJSON<void>(`${BASE}/health-checks/${id}`, { method: 'DELETE' });
export const getHealthCheckHistory = (id: string, limit?: number) => {
const query = limit ? `?limit=${limit}` : '';
return fetchJSON<HealthHistoryEntry[]>(`${BASE}/health-checks/${id}/history${query}`);
};
export const acknowledgeHealthCheck = (id: string) =>
fetchJSON<void>(`${BASE}/health-checks/${id}/acknowledge`, { method: 'POST', body: JSON.stringify({}) });
export const getHealthCheckSummary = () =>
fetchJSON<HealthCheckSummary>(`${BASE}/health-checks/summary`);