feat: add network certificate discovery (M21) and Prometheus metrics (M22)

M21 adds server-side active TLS scanning of CIDR ranges with concurrent
probing, sentinel agent pattern for pipeline reuse, and full CRUD API for
scan targets. M22 adds Prometheus exposition format endpoint alongside
existing JSON metrics. Comprehensive documentation audit updates all docs
to reflect 91 endpoints, 19 tables, 6 scheduler loops, and 900+ tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-03-24 23:37:47 -04:00
parent d613d98c72
commit 4f90be9311
26 changed files with 2022 additions and 71 deletions
+25
View File
@@ -207,6 +207,24 @@ func main() {
agentGroupService := service.NewAgentGroupService(agentGroupRepo, auditService) agentGroupService := service.NewAgentGroupService(agentGroupRepo, auditService)
discoveryRepo := postgres.NewDiscoveryRepository(db) discoveryRepo := postgres.NewDiscoveryRepository(db)
discoveryService := service.NewDiscoveryService(discoveryRepo, certificateRepo, auditService) discoveryService := service.NewDiscoveryService(discoveryRepo, certificateRepo, auditService)
networkScanRepo := postgres.NewNetworkScanRepository(db)
networkScanService := service.NewNetworkScanService(networkScanRepo, discoveryService, auditService, logger)
logger.Info("initialized network scan service")
// Ensure the sentinel "server-scanner" agent exists for network discovery dedup.
// This agent ID is used as the agent_id in discovered_certificates for network-scanned certs.
if cfg.NetworkScan.Enabled {
sentinelAgent := &domain.Agent{
ID: service.SentinelAgentID,
Name: "Network Scanner (Server-Side)",
Status: domain.AgentStatusOnline,
}
if err := agentRepo.Create(context.Background(), sentinelAgent); err != nil {
// Ignore duplicate key errors (agent already exists)
logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAgentID)
}
}
logger.Info("initialized all services") logger.Info("initialized all services")
// Initialize stats and metrics services // Initialize stats and metrics services
@@ -230,6 +248,7 @@ func main() {
metricsHandler := handler.NewMetricsHandler(statsService, time.Now()) metricsHandler := handler.NewMetricsHandler(statsService, time.Now())
healthHandler := handler.NewHealthHandler(cfg.Auth.Type) healthHandler := handler.NewHealthHandler(cfg.Auth.Type)
discoveryHandler := handler.NewDiscoveryHandler(discoveryService) discoveryHandler := handler.NewDiscoveryHandler(discoveryService)
networkScanHandler := handler.NewNetworkScanHandler(networkScanService)
logger.Info("initialized all handlers") logger.Info("initialized all handlers")
// Create context with cancellation // Create context with cancellation
@@ -242,6 +261,7 @@ func main() {
jobService, jobService,
agentService, agentService,
notificationService, notificationService,
networkScanService,
logger, logger,
) )
@@ -250,6 +270,10 @@ func main() {
sched.SetJobProcessorInterval(cfg.Scheduler.JobProcessorInterval) sched.SetJobProcessorInterval(cfg.Scheduler.JobProcessorInterval)
sched.SetAgentHealthCheckInterval(cfg.Scheduler.AgentHealthCheckInterval) sched.SetAgentHealthCheckInterval(cfg.Scheduler.AgentHealthCheckInterval)
sched.SetNotificationProcessInterval(cfg.Scheduler.NotificationProcessInterval) sched.SetNotificationProcessInterval(cfg.Scheduler.NotificationProcessInterval)
if cfg.NetworkScan.Enabled {
sched.SetNetworkScanInterval(cfg.NetworkScan.ScanInterval)
logger.Info("network scanning enabled", "interval", cfg.NetworkScan.ScanInterval.String())
}
// Start scheduler // Start scheduler
logger.Info("starting scheduler") logger.Info("starting scheduler")
@@ -276,6 +300,7 @@ func main() {
metricsHandler, metricsHandler,
healthHandler, healthHandler,
discoveryHandler, discoveryHandler,
networkScanHandler,
) )
logger.Info("registered all API handlers") logger.Info("registered all API handlers")
+42 -29
View File
@@ -25,12 +25,12 @@ flowchart TB
API["REST API\n(Go net/http, :8443)"] API["REST API\n(Go net/http, :8443)"]
SVC["Service Layer"] SVC["Service Layer"]
REPO["Repository Layer\n(database/sql + lib/pq)"] REPO["Repository Layer\n(database/sql + lib/pq)"]
SCHED["Background Scheduler\n5 loops"] SCHED["Background Scheduler\n6 loops"]
DASH["Web Dashboard\n(React SPA)"] DASH["Web Dashboard\n(React SPA)"]
end end
subgraph "Data Store" subgraph "Data Store"
PG[("PostgreSQL 16\n18 tables\nTEXT primary keys")] PG[("PostgreSQL 16\n19 tables\nTEXT primary keys")]
end end
subgraph "Agent Fleet" subgraph "Agent Fleet"
@@ -374,7 +374,7 @@ Short-lived certificates (those with profile TTL < 1 hour) return "good" from OC
### 4. Automatic Renewal ### 4. Automatic Renewal
The control plane runs a scheduler with five background loops: The control plane runs a scheduler with six background loops:
```mermaid ```mermaid
flowchart LR flowchart LR
@@ -384,6 +384,7 @@ flowchart LR
H["Agent Health\n⏱ every 2m"] H["Agent Health\n⏱ every 2m"]
N["Notification Processor\n⏱ every 1m"] N["Notification Processor\n⏱ every 1m"]
SL["Short-Lived Expiry\n⏱ every 30s"] SL["Short-Lived Expiry\n⏱ every 30s"]
NS["Network Scanner\n⏱ every 6h"]
end end
R -->|"Find expiring certs\nCreate renewal jobs"| DB[("PostgreSQL")] R -->|"Find expiring certs\nCreate renewal jobs"| DB[("PostgreSQL")]
@@ -391,6 +392,7 @@ flowchart LR
H -->|"Check heartbeat staleness\nMark agents offline"| DB H -->|"Check heartbeat staleness\nMark agents offline"| DB
N -->|"Send pending notifications\nEmail / Webhook / Slack"| DB N -->|"Send pending notifications\nEmail / Webhook / Slack"| DB
SL -->|"Expire short-lived certs\nMark as Expired"| DB SL -->|"Expire short-lived certs\nMark as Expired"| DB
NS -->|"Probe TLS endpoints\nStore discovered certs"| DB
``` ```
| Loop | Interval | Timeout | Purpose | | Loop | Interval | Timeout | Purpose |
@@ -400,6 +402,7 @@ flowchart LR
| Agent health check | 2 minutes | 1 minute | Marks agents as offline if heartbeat is stale | | Agent health check | 2 minutes | 1 minute | Marks agents as offline if heartbeat is stale |
| Notification processor | 1 minute | 1 minute | Sends pending notifications via configured channels | | Notification processor | 1 minute | 1 minute | Sends pending notifications via configured channels |
| Short-lived expiry | 30 seconds | 30 seconds | Marks expired short-lived certificates (profile TTL < 1 hour) | | Short-lived expiry | 30 seconds | 30 seconds | Marks expired short-lived certificates (profile TTL < 1 hour) |
| Network scanner | 6 hours | 30 minutes | Probes TLS endpoints on configured CIDR ranges, stores discovered certs (M21, opt-in via `CERTCTL_NETWORK_SCAN_ENABLED`) |
Each operation has a context timeout to prevent indefinite hangs if external services become unresponsive. Each operation has a context timeout to prevent indefinite hangs if external services become unresponsive.
@@ -605,7 +608,7 @@ All endpoints are under `/api/v1/` and follow consistent patterns:
Resources: certificates, issuers, targets, agents, jobs, policies, profiles, teams, owners, agent-groups, audit, notifications. Resources: certificates, issuers, targets, agents, jobs, policies, profiles, teams, owners, agent-groups, audit, notifications.
The full API is documented in an OpenAPI 3.1 specification at `api/openapi.yaml` with 78 documented operations (including health, readiness, and auth endpoints; 7 discovery endpoints from M18b pending spec update), all request/response schemas, and pagination conventions. See the [OpenAPI Guide](openapi.md) for usage with Swagger UI and SDK generation. The full API is documented in an OpenAPI 3.1 specification at `api/openapi.yaml` with 91 endpoints across 19 resource domains (including health, readiness, auth, 7 discovery endpoints from M18b, 6 network scan endpoints from M21, and Prometheus metrics from M22), all request/response schemas, and pagination conventions. See the [OpenAPI Guide](openapi.md) for usage with Swagger UI and SDK generation.
Jobs support additional action endpoints: `POST /api/v1/jobs/{id}/cancel`, `POST /api/v1/jobs/{id}/approve`, `POST /api/v1/jobs/{id}/reject`. Jobs support additional action endpoints: `POST /api/v1/jobs/{id}/cancel`, `POST /api/v1/jobs/{id}/approve`, `POST /api/v1/jobs/{id}/reject`.
@@ -703,54 +706,64 @@ flowchart TB
For production, you would also add an ingress controller, TLS termination for the certctl API itself, and external PostgreSQL (RDS, Cloud SQL, etc.). For production, you would also add an ingress controller, TLS termination for the certctl API itself, and external PostgreSQL (RDS, Cloud SQL, etc.).
## Discovery Data Flow (M18b) ## Discovery Data Flow (M18b + M21)
Certificate discovery enables operators to build a complete inventory of existing certificates before managing them with certctl. Here's how data flows through the system: Certificate discovery enables operators to build a complete inventory of existing certificates before managing them with certctl. There are two discovery modes that feed into the same pipeline:
```mermaid ```mermaid
flowchart TB flowchart TB
AGENT["certctl-agent\n(on infrastructure)"] subgraph "Discovery Sources"
SCAN["Filesystem Scanner\n(CERTCTL_DISCOVERY_DIRS)"] AGENT["certctl-agent\n(filesystem discovery)"]
SCAN["Filesystem Scanner\n(CERTCTL_DISCOVERY_DIRS)"]
SERVER["certctl-server\n(network discovery)"]
NETSCAN["TLS Scanner\n(CIDR ranges + ports)"]
end
EXTRACT["Extract Metadata\n(CN, SANs, serial, issuer, expiry, fingerprint)"] EXTRACT["Extract Metadata\n(CN, SANs, serial, issuer, expiry, fingerprint)"]
REPORT["POST /api/v1/agents/{id}/discoveries\n(submit scan results)"]
HANDLER["Discovery Handler\n(parse request)"]
SERVICE["Discovery Service\n(ProcessDiscoveryReport)"] SERVICE["Discovery Service\n(ProcessDiscoveryReport)"]
REPO["Discovery Repository\n(upsert with fingerprint dedup)"] REPO["Discovery Repository\n(upsert with fingerprint dedup)"]
DB["PostgreSQL\ndiscovered_certificates\ndiscovery_scans tables"] DB["PostgreSQL\ndiscovered_certificates\ndiscovery_scans tables"]
AUDIT["Audit Service\n(RecordDiscoveryScanCompleted)"] AUDIT["Audit Service\n(RecordDiscoveryScanCompleted)"]
API_LIST["GET /api/v1/discovered-certificates\n(list for triage)"] API_LIST["GET /api/v1/discovered-certificates\n(list for triage)"]
API_CLAIM["POST /discovered-certificates/{id}/claim\n(operator claims cert)"] API_CLAIM["POST /discovered-certificates/{id}/claim"]
API_DISMISS["POST /discovered-certificates/{id}/dismiss\n(operator dismisses)"] API_DISMISS["POST /discovered-certificates/{id}/dismiss"]
UPDATE_STATUS["Update Status\n(Unmanaged → Managed/Dismissed)"]
AGENT -->|"Scan loop\n(startup + 6h)"| SCAN AGENT -->|"Scan loop\n(startup + 6h)"| SCAN
SCAN --> EXTRACT SCAN --> EXTRACT
EXTRACT --> REPORT SERVER -->|"Scheduler loop\n(every 6h)"| NETSCAN
REPORT --> HANDLER NETSCAN -->|"crypto/tls.Dial\n50 goroutines"| EXTRACT
HANDLER --> SERVICE EXTRACT --> SERVICE
SERVICE --> REPO SERVICE --> REPO
REPO -->|"Dedup by fingerprint\n+ agent + path"| DB REPO -->|"Dedup by fingerprint\n+ agent_id + source_path"| DB
SERVICE --> AUDIT SERVICE --> AUDIT
AUDIT -->|"discovery_scan_completed"| DB
DB -->|"query unmanaged"| API_LIST
API_LIST -->|"operator reviews"| API_CLAIM
API_LIST -->|"operator reviews"| API_DISMISS
API_CLAIM --> UPDATE_STATUS
API_DISMISS --> UPDATE_STATUS
UPDATE_STATUS -->|"RecordDiscoveryCertClaimed\nRecordDiscoveryCertDismissed"| AUDIT
AUDIT --> DB AUDIT --> DB
DB --> API_LIST
API_LIST --> API_CLAIM
API_LIST --> API_DISMISS
``` ```
**Key steps:** **Filesystem Discovery (M18b):**
1. **Agent-side discovery** — Agent scans `CERTCTL_DISCOVERY_DIRS` on startup and every 6 hours, walking directories recursively and parsing PEM/DER files 1. **Agent-side discovery** — Agent scans `CERTCTL_DISCOVERY_DIRS` on startup and every 6 hours, walking directories recursively and parsing PEM/DER files
2. **Metadata extraction** — For each certificate found, extract: common name, SANs, serial number, issuer DN, subject DN, expiration date, key algorithm, key size, is_ca flag, SHA-256 fingerprint (used as dedup key) 2. **Metadata extraction** — For each certificate found, extract: common name, SANs, serial number, issuer DN, subject DN, expiration date, key algorithm, key size, is_ca flag, SHA-256 fingerprint (used as dedup key)
3. **Server submission** — Agent POSTs scan results as `DiscoveryReport` to `POST /api/v1/agents/{id}/discoveries` 3. **Server submission** — Agent POSTs scan results as `DiscoveryReport` to `POST /api/v1/agents/{id}/discoveries`
4. **Deduplication** — Server uses fingerprint + agent ID + filesystem path as unique key; prevents duplicate records of the same cert on the same agent 4. **Deduplication** — Server uses fingerprint + agent ID + filesystem path as unique key; prevents duplicate records of the same cert on the same agent
5. **Storage** — Records stored in `discovered_certificates` table with status = "Unmanaged"
6. **Audit**`discovery_scan_completed` event logged with agent ID, cert count, scan timestamp **Network Discovery (M21):**
7. **Operator triage** — Operator queries `GET /api/v1/discovered-certificates?status=Unmanaged` to see new findings
8. **Claim or dismiss** — For each unmanaged cert, operator either: 1. **Target configuration** — Operator creates network scan targets via `POST /api/v1/network-scan-targets` with CIDR ranges, ports, and scan interval
2. **CIDR expansion** — Ranges expanded to individual IPs with /20 safety cap (4096 IPs max)
3. **TLS probing** — Server uses `crypto/tls.DialWithDialer` with `InsecureSkipVerify=true` to connect to each endpoint; 50 concurrent goroutines with configurable timeout
4. **Certificate extraction** — Full X.509 metadata extracted from TLS handshake peer certificates
5. **Sentinel agent** — Results submitted using `server-scanner` as virtual agent ID, with `source_path` set to `ip:port` and `source_format` set to `network`
6. **Same pipeline** — Feeds into the same `DiscoveryService.ProcessDiscoveryReport()` as filesystem discovery — same dedup, same audit trail, same triage workflow
**Common triage workflow (both sources):**
1. **Storage** — Records stored in `discovered_certificates` table with status = "Unmanaged"
2. **Audit**`discovery_scan_completed` event logged with agent ID, cert count, scan timestamp
3. **Operator triage** — Operator queries `GET /api/v1/discovered-certificates?status=Unmanaged` to see new findings
4. **Claim or dismiss** — For each unmanaged cert, operator either:
- **Claims it** via `POST /discovered-certificates/{id}/claim` — links to existing managed cert or creates new enrollment - **Claims it** via `POST /discovered-certificates/{id}/claim` — links to existing managed cert or creates new enrollment
- **Dismisses it** via `POST /discovered-certificates/{id}/dismiss` — removes from triage, marked as "Dismissed" - **Dismisses it** via `POST /discovered-certificates/{id}/dismiss` — removes from triage, marked as "Dismissed"
9. **Status tracking**`discovery_cert_claimed` and `discovery_cert_dismissed` events audit the operator's decision 9. **Status tracking**`discovery_cert_claimed` and `discovery_cert_dismissed` events audit the operator's decision
+9 -6
View File
@@ -160,17 +160,20 @@ Each section includes:
- **Health Endpoint** — `GET /health` returns 200 OK with service status. Consumed by Docker health checks and Kubernetes probes. - **Health Endpoint** — `GET /health` returns 200 OK with service status. Consumed by Docker health checks and Kubernetes probes.
- **Readiness Endpoint** — `GET /ready` returns 200 OK when the database is connected and migrations are applied. - **Readiness Endpoint** — `GET /ready` returns 200 OK when the database is connected and migrations are applied.
- **Background Scheduler Monitoring** — 5 background loops run on a fixed schedule: - **Background Scheduler Monitoring** — 6 background loops run on a fixed schedule:
- Renewal loop: every 1 hour, scans for certificates approaching renewal threshold - Renewal loop: every 1 hour, scans for certificates approaching renewal threshold
- Job processor loop: every 30 seconds, picks up pending/waiting jobs and advances their state - Job processor loop: every 30 seconds, picks up pending/waiting jobs and advances their state
- Health check loop: every 2 minutes, pings agents to detect downtime - Health check loop: every 2 minutes, pings agents to detect downtime
- Notification dispatcher loop: every 1 minute, sends queued alerts - Notification dispatcher loop: every 1 minute, sends queued alerts
- Short-lived cert expiry loop: every 30 seconds, marks expired short-lived credentials - Short-lived cert expiry loop: every 30 seconds, marks expired short-lived credentials
- Network scanner loop: every 6 hours, scans enabled TLS endpoints for certificate discovery
Each loop includes error handling and logs failures via structured slog. Each loop includes error handling and logs failures via structured slog.
- **JSON Metrics Endpoint** — `GET /api/v1/metrics` returns JSON object with: - **Metrics Endpoints** — Two formats for monitoring integration:
- **Gauges** — `certificates_total`, `certificates_active`, `certificates_expiring_soon`, `agents_total`, `agents_healthy`, `pending_jobs`, `failed_jobs` - `GET /api/v1/metrics` — JSON object with gauges, counters, and uptime for custom dashboards
- **Counters** — `certs_issued_total`, `certs_renewed_total`, `certs_revoked_total`, `deployments_completed_total`, `deployments_failed_total` - `GET /api/v1/metrics/prometheus` — Prometheus exposition format (`text/plain; version=0.0.4`) for native scraping by Prometheus, Grafana Agent, Datadog, and other OpenMetrics-compatible collectors
- **Uptime** — `uptime_seconds` (seconds since server start) - **Gauges** — `certctl_certificate_total`, `certctl_certificate_active`, `certctl_certificate_expiring`, `certctl_certificate_expired`, `certctl_certificate_revoked`, `certctl_agent_total`, `certctl_agent_active`, `certctl_job_pending`
- **Counters** — `certctl_job_completed_total`, `certctl_job_failed_total`
- **Uptime** — `certctl_uptime_seconds` (seconds since server start)
All values are point-in-time snapshots computed from database tables. All values are point-in-time snapshots computed from database tables.
- **Structured Logging** — All scheduler operations, API calls, and connector actions log via `slog` (Go's structured logger). Logs include timestamp, level (DEBUG/INFO/WARN/ERROR), structured fields (e.g., `actor`, `resource_id`, `latency_ms`), and request IDs for tracing. - **Structured Logging** — All scheduler operations, API calls, and connector actions log via `slog` (Go's structured logger). Logs include timestamp, level (DEBUG/INFO/WARN/ERROR), structured fields (e.g., `actor`, `resource_id`, `latency_ms`), and request IDs for tracing.
- **Request ID Propagation** — Each HTTP request gets a unique ID (`X-Request-ID` header). The ID is included in all correlated logs, making it easy to trace a single request through multiple service layers. - **Request ID Propagation** — Each HTTP request gets a unique ID (`X-Request-ID` header). The ID is included in all correlated logs, making it easy to trace a single request through multiple service layers.
@@ -426,7 +429,7 @@ Each section includes:
| | Metrics JSON Endpoint | `GET /api/v1/metrics` (gauges, counters, uptime) | ✅ | ✅ | Set thresholds, configure alerting | | | Metrics JSON Endpoint | `GET /api/v1/metrics` (gauges, counters, uptime) | ✅ | ✅ | Set thresholds, configure alerting |
| | Stats API (time-series) | `GET /api/v1/stats/*` (summary, status, expiration, jobs, issuance) | ✅ | ✅ | Integrate into dashboards, SLO tracking | | | Stats API (time-series) | `GET /api/v1/stats/*` (summary, status, expiration, jobs, issuance) | ✅ | ✅ | Integrate into dashboards, SLO tracking |
| | Structured Logging | `slog` middleware with request IDs | ✅ | ✅ | Aggregate logs to SIEM, define retention policy | | | Structured Logging | `slog` middleware with request IDs | ✅ | ✅ | Aggregate logs to SIEM, define retention policy |
| | Background Scheduler | 5 loops (renewal 1h, jobs 30s, health 2m, notifications 1m, short-lived 30s) | ✅ | ✅ | Alert on scheduler loop failures | | | Background Scheduler | 6 loops (renewal 1h, jobs 30s, health 2m, notifications 1m, short-lived 30s, network scan 6h) | ✅ | ✅ | Alert on scheduler loop failures |
| **CC7.2** Anomaly Detection | Immutable API Audit Trail | `internal/api/middleware/audit.go`, `GET /api/v1/audit` | ✅ | Enhanced (SIEM export) | Integrate into SIEM, search for anomalies, archive long-term | | **CC7.2** Anomaly Detection | Immutable API Audit Trail | `internal/api/middleware/audit.go`, `GET /api/v1/audit` | ✅ | Enhanced (SIEM export) | Integrate into SIEM, search for anomalies, archive long-term |
| | Expiration Threshold Alerting | Configurable per-policy (default 30/14/7/0 days) | ✅ | ✅ | Configure thresholds, integrate notifications | | | Expiration Threshold Alerting | Configurable per-policy (default 30/14/7/0 days) | ✅ | ✅ | Configure thresholds, integrate notifications |
| | Status Auto-Transitions | Active → Expiring (30d) → Expired (0d) | ✅ | ✅ | Monitor status changes in audit trail | | | Status Auto-Transitions | Active → Expiring (30d) → Expired (0d) | ✅ | ✅ | Monitor status changes in audit trail |
+2 -2
View File
@@ -194,7 +194,7 @@ The MCP server is a separate binary (`cmd/mcp-server/`) that communicates via st
Certificate discovery is the process of automatically finding existing certificates in your infrastructure — certificates you didn't issue through certctl, possibly issued by other CAs or tools. This is essential for building a complete inventory before you can manage everything. Certificate discovery is the process of automatically finding existing certificates in your infrastructure — certificates you didn't issue through certctl, possibly issued by other CAs or tools. This is essential for building a complete inventory before you can manage everything.
**How it works:** Agents can scan configured directories (configured via `CERTCTL_DISCOVERY_DIRS`) for certificate files. On startup and every 6 hours, the agent walks these directories recursively, parses PEM and DER files, extracts metadata (common name, SANs, expiration, issuer, key algorithm), and reports all findings to the control plane. The server deduplicates by fingerprint (prevents duplicate reports of the same cert) and stores them with a status: **Unmanaged** (discovered but not yet managed), **Managed** (linked to a control plane cert), or **Dismissed** (operator decided not to manage it). **How it works:** There are two discovery modes. *Filesystem discovery* — agents scan configured directories (configured via `CERTCTL_DISCOVERY_DIRS`) for certificate files. On startup and every 6 hours, the agent walks directories recursively, parses PEM and DER files, extracts metadata, and reports findings to the control plane. *Network discovery* — the control plane itself probes TLS endpoints across configured CIDR ranges and ports (enabled via `CERTCTL_NETWORK_SCAN_ENABLED=true`). It connects to each endpoint, extracts certificates from the TLS handshake, and feeds results into the same discovery pipeline. This finds certificates on services you may not have agents on. In both cases, the server deduplicates by fingerprint and stores discovered certs with a status: **Unmanaged** (discovered but not yet managed), **Managed** (linked to a control plane cert), or **Dismissed** (operator decided not to manage it).
This gives you a three-step triage workflow: This gives you a three-step triage workflow:
1. **Discover** — Agents find all existing certs on your infrastructure 1. **Discover** — Agents find all existing certs on your infrastructure
@@ -205,7 +205,7 @@ This is a prerequisite for multi-CA migration, compliance audits, and building c
### Observability ### Observability
certctl exposes a JSON metrics endpoint at `GET /api/v1/metrics` with gauges (certificate totals by status, agent counts, pending jobs), counters (completed/failed jobs), and uptime. Five stats endpoints power the dashboard charts: summary statistics, certificates by status, expiration timeline, job trends, and issuance rate. certctl exposes metrics in two formats: a JSON endpoint at `GET /api/v1/metrics` and a Prometheus exposition format at `GET /api/v1/metrics/prometheus` (compatible with Prometheus, Grafana Agent, Datadog Agent, and Victoria Metrics). Both provide gauges (certificate totals by status, agent counts, pending jobs), counters (completed/failed jobs), and uptime. Five stats endpoints power the dashboard charts: summary statistics, certificates by status, expiration timeline, job trends, and issuance rate.
The agent fleet overview page groups agents by OS, architecture, and version, showing distribution charts that help ops teams track fleet health and identify outdated agents. All API requests are logged via structured `slog` middleware with request IDs for correlation. The agent fleet overview page groups agents by OS, architecture, and version, showing distribution charts that help ops teams track fleet health and identify outdated agents. All API requests are logged via structured `slog` middleware with request IDs for correlation.
+78
View File
@@ -639,6 +639,84 @@ curl -s http://localhost:8443/api/v1/discovery-summary | jq .
- **Compliance** — Detect rogue/unauthorized certificates in monitored directories - **Compliance** — Detect rogue/unauthorized certificates in monitored directories
- **Integration** — Pull certificate data from systems that pre-generate certs (e.g., Kubernetes CertManager) - **Integration** — Pull certificate data from systems that pre-generate certs (e.g., Kubernetes CertManager)
## Network Certificate Scanner (M21)
The control plane includes a built-in active TLS scanner that probes network endpoints and discovers certificates without requiring agent deployment. This complements the agent-based filesystem discovery with network-level visibility.
### Configuration
Enable network scanning on the server:
```bash
export CERTCTL_NETWORK_SCAN_ENABLED=true
export CERTCTL_NETWORK_SCAN_INTERVAL=6h # default
```
### Creating Scan Targets
Network scan targets define which CIDR ranges and ports to probe:
```bash
# Create a scan target for your internal network
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
-H "Content-Type: application/json" \
-d '{
"name": "Production Web Servers",
"cidrs": ["10.0.1.0/24", "10.0.2.0/24"],
"ports": [443, 8443, 6443],
"enabled": true,
"scan_interval_hours": 6,
"timeout_ms": 5000
}' | jq .
```
### How It Works
1. **Expand**: CIDR ranges are expanded to individual IPs (safety cap at /20 = 4096 IPs)
2. **Probe**: Concurrent TLS connections (50 goroutines) with configurable timeout per endpoint
3. **Extract**: Certificate metadata extracted from TLS handshake (CN, SANs, serial, issuer, key info, fingerprint)
4. **Pipeline**: Results fed into the same `DiscoveryService.ProcessDiscoveryReport()` as filesystem discovery
5. **Deduplicate**: Sentinel agent ID (`server-scanner`) with source_path as `ip:port` ensures proper dedup
6. **Triage**: Discovered certs appear in `GET /api/v1/discovered-certificates` with `agent_id=server-scanner`
### API Endpoints
```bash
# List all scan targets
curl -s http://localhost:8443/api/v1/network-scan-targets | jq .
# Create a scan target
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
-H "Content-Type: application/json" \
-d '{"name": "DMZ", "cidrs": ["172.16.0.0/24"], "ports": [443]}' | jq .
# Get a specific target (includes last_scan_at, last_scan_certs_found)
curl -s http://localhost:8443/api/v1/network-scan-targets/nst-dmz | jq .
# Trigger an immediate scan (doesn't wait for scheduler)
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets/nst-dmz/scan | jq .
# Update scan configuration
curl -s -X PUT http://localhost:8443/api/v1/network-scan-targets/nst-dmz \
-H "Content-Type: application/json" \
-d '{"ports": [443, 8443, 9443], "timeout_ms": 3000}' | jq .
# Delete a scan target
curl -s -X DELETE http://localhost:8443/api/v1/network-scan-targets/nst-dmz
```
### Scheduler Integration
When `CERTCTL_NETWORK_SCAN_ENABLED=true`, the server runs a 6th scheduler loop (alongside renewal, jobs, health, notifications, and short-lived expiry). It scans all enabled targets at the configured interval (default 6h). Each target tracks `last_scan_at`, `last_scan_duration_ms`, and `last_scan_certs_found` for monitoring scan health.
### Use Cases
- **Network inventory** — "What TLS certs are deployed across my network?" without deploying agents
- **Shadow certificate detection** — Find certificates on services you didn't know were running TLS
- **Compliance scanning** — Prove to auditors that all TLS endpoints are inventoried
- **Migration assessment** — Scan a network range before onboarding to certctl management
- **Expiration monitoring** — Discover soon-to-expire certs on network endpoints before they cause outages
## What's Next ## What's Next
- [Architecture Guide](architecture.md) — Understanding the full system design - [Architecture Guide](architecture.md) — Understanding the full system design
+46 -12
View File
@@ -695,11 +695,14 @@ curl -s "$API/api/v1/stats/job-trends?days=30" | jq .
# Issuance rate — new certificates per day over 30 days # Issuance rate — new certificates per day over 30 days
curl -s "$API/api/v1/stats/issuance-rate?days=30" | jq . curl -s "$API/api/v1/stats/issuance-rate?days=30" | jq .
# System metrics — gauges, counters, uptime # System metrics — gauges, counters, uptime (JSON)
curl -s $API/api/v1/metrics | jq . curl -s $API/api/v1/metrics | jq .
# System metrics — Prometheus exposition format (for Prometheus/Grafana/Datadog scraping)
curl -s $API/api/v1/metrics/prometheus
``` ```
**How it works:** The `StatsService` computes aggregations in Go from existing repository List methods — no additional SQL queries or materialized views. This keeps the database schema simple while providing real-time dashboard data. The metrics endpoint returns gauges (cert totals by status, agent counts, pending jobs), counters (completed/failed jobs), and server uptime. **How it works:** The `StatsService` computes aggregations in Go from existing repository List methods — no additional SQL queries or materialized views. This keeps the database schema simple while providing real-time dashboard data. The JSON metrics endpoint returns gauges (cert totals by status, agent counts, pending jobs), counters (completed/failed jobs), and server uptime. The Prometheus endpoint (`/api/v1/metrics/prometheus`) exposes the same data in Prometheus exposition format (`text/plain; version=0.0.4`) with `certctl_` prefixed metric names — ready for scraping by Prometheus, Grafana Agent, Datadog Agent, or Victoria Metrics.
**In the dashboard**, these stats power four interactive charts: an expiration heatmap, renewal success rate trends, certificate status distribution, and issuance rate. The agent fleet overview page uses agent metadata to group by OS, architecture, and version. **In the dashboard**, these stats power four interactive charts: an expiration heatmap, renewal success rate trends, certificate status distribution, and issuance rate. The agent fleet overview page uses agent metadata to group by OS, architecture, and version.
@@ -916,11 +919,13 @@ The MCP server is perfect for:
--- ---
## Part 16: Certificate Discovery (M18b) ## Part 16: Certificate Discovery (M18b + M21)
Agents can automatically discover existing certificates already deployed in your infrastructure. This is useful for building a baseline inventory before you start managing everything with certctl. certctl discovers existing certificates two ways: **filesystem scanning** (agents scan local directories) and **network scanning** (the server probes TLS endpoints). Both feed into the same triage pipeline.
First, configure the demo agent to scan for certificates. In the Docker Compose setup, agents have a `/tmp/certs` directory (created by the seed script). Restart the agent with discovery enabled: ### Filesystem Discovery (Agent-Side)
Configure the demo agent to scan for certificates. In the Docker Compose setup, agents have a `/tmp/certs` directory (created by the seed script). Restart the agent with discovery enabled:
```bash ```bash
# Stop the existing agent # Stop the existing agent
@@ -936,17 +941,46 @@ Or with the CLI flag:
certctl-agent --agent-id a-demo-1 --key-dir /tmp/keys --discovery-dirs /tmp/certs --server http://localhost:8443 --api-key test-key-123 certctl-agent --agent-id a-demo-1 --key-dir /tmp/keys --discovery-dirs /tmp/certs --server http://localhost:8443 --api-key test-key-123
``` ```
Now check what the agent discovered: ### Network Discovery (Server-Side)
The server can also discover certificates by actively probing TLS endpoints — no agent required. Create a scan target and trigger a scan:
```bash ```bash
# List discovered certificates (should show unmanaged certs found on the agent) # Create a network scan target
curl -s -X POST $API/api/v1/network-scan-targets \
-H "Content-Type: application/json" \
-d '{
"name": "Demo Local Scan",
"cidrs": ["127.0.0.1/32"],
"ports": [8443],
"enabled": true,
"scan_interval_hours": 6,
"timeout_ms": 5000
}' | jq .
# Trigger an immediate scan (otherwise runs every 6 hours)
NST_ID=$(curl -s $API/api/v1/network-scan-targets | jq -r '.data[0].id')
curl -s -X POST "$API/api/v1/network-scan-targets/$NST_ID/scan" | jq .
# List scan targets and their results
curl -s $API/api/v1/network-scan-targets | jq .
```
Network-discovered certificates appear in the same discovery pipeline as filesystem-discovered ones, with `agent_id=server-scanner` and `source_format=network`.
### Triage Discovered Certificates
Both discovery sources feed into the same triage workflow. Check what was found:
```bash
# List discovered certificates (should show unmanaged certs found by agents and network scans)
curl -s "$API/api/v1/discovered-certificates?status=Unmanaged" | jq '.data[] | {id, common_name, expires_at, issuer_dn, status}' curl -s "$API/api/v1/discovered-certificates?status=Unmanaged" | jq '.data[] | {id, common_name, expires_at, issuer_dn, status}'
# Get a summary of all discoveries # Get a summary of all discoveries
curl -s $API/api/v1/discovery-summary | jq . curl -s $API/api/v1/discovery-summary | jq .
``` ```
If the agent found certificates, you'll see entries with `status: "Unmanaged"`. Now triage them — claim the ones you want to manage or dismiss the ones you don't: If certificates were found, you'll see entries with `status: "Unmanaged"`. Triage them — claim the ones you want to manage or dismiss the ones you don't:
```bash ```bash
# Claim a certificate (link it to a managed cert, or create new enrollment) # Claim a certificate (link it to a managed cert, or create new enrollment)
@@ -961,9 +995,9 @@ curl -s -X POST "$API/api/v1/discovered-certificates/$DISCOVERED_ID/dismiss" \
-d '{"reason": "Self-signed test cert, not production"}' | jq . -d '{"reason": "Self-signed test cert, not production"}' | jq .
``` ```
**How it works:** The agent scans `CERTCTL_DISCOVERY_DIRS` on startup and every 6 hours, extracts metadata (common name, SANs, issuer, expiration, key type, fingerprint) from all PEM and DER files, and POSTs the findings to `POST /api/v1/agents/{id}/discoveries`. The server deduplicates by fingerprint (prevents duplicate records) and stores results with a status: **Unmanaged** (discovered, not yet managed), **Managed** (linked to a control plane cert), or **Dismissed** (operator decided not to manage). This gives you a triage workflow: discover → review → claim or dismiss. **How it works:** Filesystem discovery: the agent scans `CERTCTL_DISCOVERY_DIRS` on startup and every 6 hours, extracts metadata (common name, SANs, issuer, expiration, key type, fingerprint) from all PEM and DER files, and POSTs findings to `POST /api/v1/agents/{id}/discoveries`. Network discovery: the server expands CIDR ranges (capped at /20 = 4096 IPs), connects to each IP:port via TLS, extracts the peer certificate chain, and stores results using `server-scanner` as a sentinel agent ID. Both sources deduplicate by fingerprint and store results with a status: **Unmanaged** (discovered, not yet managed), **Managed** (linked to a control plane cert), or **Dismissed** (operator decided not to manage). This gives you a triage workflow: discover → review → claim or dismiss.
**In the dashboard**, the Discovery page (coming in future V2.x) will provide a visual triage interface for claiming and dismissing discovered certificates. **In the dashboard**, click "Discovered Certificates" in the sidebar to see what agents and network scans found — claim unmanaged certs to bring them under certctl's management, or dismiss them.
--- ---
@@ -989,12 +1023,12 @@ flowchart TB
API["REST API\nGo net/http"] API["REST API\nGo net/http"]
SVC["Service Layer\nBusiness Logic"] SVC["Service Layer\nBusiness Logic"]
REPO["Repository Layer\ndatabase/sql + lib/pq"] REPO["Repository Layer\ndatabase/sql + lib/pq"]
SCHED["Scheduler\n5 background loops"] SCHED["Scheduler\n6 background loops"]
CONN["Connector Registry\nIssuer + Target + Notifier"] CONN["Connector Registry\nIssuer + Target + Notifier"]
end end
subgraph "Data Store" subgraph "Data Store"
PG["PostgreSQL 16\n18 tables, TEXT PKs"] PG["PostgreSQL 16\n19 tables, TEXT PKs"]
end end
subgraph "Agent (certctl-agent)" subgraph "Agent (certctl-agent)"
+7 -3
View File
@@ -70,11 +70,11 @@ On the Certificates page, select multiple certificates using the checkboxes. A b
Click any certificate, then scroll to the deployment timeline. A visual 4-step timeline shows the lifecycle: Requested → Issued → Deploying → Active. Previous versions show a rollback button. Click any certificate, then scroll to the deployment timeline. A visual 4-step timeline shows the lifecycle: Requested → Issued → Deploying → Active. Previous versions show a rollback button.
**11. "What about certificates already running in production?"** **11. "What about certificates already running in production?"**
Enable discovery on agents by setting `CERTCTL_DISCOVERY_DIRS` to directories containing certificates (e.g., `/etc/nginx/certs`). Agents scan on startup and every 6 hours, report findings to the control plane. Click "Discovered Certificates" to see what agents found — claim unmanaged certs to bring them under certctl's management, or dismiss them. Enable discovery on agents by setting `CERTCTL_DISCOVERY_DIRS` to directories containing certificates (e.g., `/etc/nginx/certs`). Agents scan on startup and every 6 hours, report findings to the control plane. For network-based discovery without agents, enable `CERTCTL_NETWORK_SCAN_ENABLED=true` and configure scan targets via the API — the server probes TLS endpoints on configured CIDR ranges and ports. Click "Discovered Certificates" to see what agents and network scans found — claim unmanaged certs to bring them under certctl's management, or dismiss them.
## REST API Walkthrough ## REST API Walkthrough
The dashboard is backed by a real REST API (84 endpoints). Try these while the demo is running: The dashboard is backed by a real REST API (91 endpoints). Try these while the demo is running:
```bash ```bash
# List all certificates # List all certificates
@@ -114,6 +114,7 @@ curl -s http://localhost:8443/api/v1/stats/expiration-timeline | jq .
curl -s http://localhost:8443/api/v1/stats/job-trends | jq . curl -s http://localhost:8443/api/v1/stats/job-trends | jq .
curl -s http://localhost:8443/api/v1/stats/issuance-rate | jq . curl -s http://localhost:8443/api/v1/stats/issuance-rate | jq .
curl -s http://localhost:8443/api/v1/metrics | jq . curl -s http://localhost:8443/api/v1/metrics | jq .
curl -s http://localhost:8443/api/v1/metrics/prometheus # Prometheus format
# Certificate profiles # Certificate profiles
curl -s http://localhost:8443/api/v1/profiles | jq . curl -s http://localhost:8443/api/v1/profiles | jq .
@@ -135,6 +136,9 @@ curl -s http://localhost:8443/api/v1/discovered-certificates | jq .
# Discovery summary (counts by status) # Discovery summary (counts by status)
curl -s http://localhost:8443/api/v1/discovery-summary | jq . curl -s http://localhost:8443/api/v1/discovery-summary | jq .
# Network scan targets (active TLS scanning)
curl -s http://localhost:8443/api/v1/network-scan-targets | jq .
``` ```
## CLI Tool ## CLI Tool
@@ -236,7 +240,7 @@ If you're demoing to a team or customer, here's a suggested flow:
7. **Show profiles** — "Certificate profiles enforce crypto constraints — key types, max TTL, compliance requirements" 7. **Show profiles** — "Certificate profiles enforce crypto constraints — key types, max TTL, compliance requirements"
8. **Show policies** — "Guardrails prevent teams from going outside approved scope" 8. **Show policies** — "Guardrails prevent teams from going outside approved scope"
9. **Show bulk operations** — "Select multiple certs, trigger renewal or revoke in bulk with progress tracking" 9. **Show bulk operations** — "Select multiple certs, trigger renewal or revoke in bulk with progress tracking"
10. **Show certificate discovery** — "Agents scan your infrastructure for existing certificates you're not managing yet. We automatically deduplicate by fingerprint, show you what we found, and let you claim them or dismiss them" 10. **Show certificate discovery** — "We discover certificates two ways: agents scan local filesystems, and the server actively probes TLS endpoints on your network. We deduplicate by fingerprint, show you what we found, and let you claim them or dismiss them"
11. **Show the immutable audit trail** — "Every action in the system is recorded: who did it, what they did, when, what changed. Export to CSV/JSON for compliance" 11. **Show the immutable audit trail** — "Every action in the system is recorded: who did it, what they did, when, what changed. Export to CSV/JSON for compliance"
12. **Show advanced query features** — "Sort by any field, filter by date range, paginate efficiently with cursor-based pagination, select just the fields you need" 12. **Show advanced query features** — "Sort by any field, filter by date range, paginate efficiently with cursor-based pagination, select just the fields you need"
13. **Show the CLI and MCP server** — "Terminal users get `certctl-cli` with 10 subcommands. AI assistants get MCP integration with 76 tools. Everything is API-first" 13. **Show the CLI and MCP server** — "Terminal users get `certctl-cli` with 10 subcommands. AI assistants get MCP integration with 76 tools. Everything is API-first"
+71 -5
View File
@@ -7,7 +7,7 @@ Complete reference of all features shipped in the V2 release (as of March 2026).
## API Surface ## API Surface
### Overview ### Overview
- **84 endpoints** across 17 resource domains under `/api/v1/` - **91 endpoints** across 19 resource domains under `/api/v1/`
- REST API with HTTP semantics (GET, POST, PUT, DELETE) - REST API with HTTP semantics (GET, POST, PUT, DELETE)
- All endpoints require authentication by default (configurable) - All endpoints require authentication by default (configurable)
- OpenAPI 3.1 spec with full schema documentation - OpenAPI 3.1 spec with full schema documentation
@@ -55,10 +55,11 @@ Complete reference of all features shipped in the V2 release (as of March 2026).
| **Owners** | 5 | List, create, get, update, delete | | **Owners** | 5 | List, create, get, update, delete |
| **Agent Groups** | 6 | List, create, get, update, delete, list agents in group | | **Agent Groups** | 6 | List, create, get, update, delete, list agents in group |
| **Discovery** | 7 | Submit scan results, list discovered certs, get detail, claim, dismiss, list scans, summary stats | | **Discovery** | 7 | Submit scan results, list discovered certs, get detail, claim, dismiss, list scans, summary stats |
| **Network Scan** | 6 | List targets, create, get, update, delete, trigger scan |
| **Audit** | 3 | List events, list by resource, export (CSV/JSON) | | **Audit** | 3 | List events, list by resource, export (CSV/JSON) |
| **Notifications** | 3 | List, get, mark as read | | **Notifications** | 3 | List, get, mark as read |
| **Stats** | 5 | Dashboard summary, certificates by status, expiration timeline, job trends, issuance rate | | **Stats** | 5 | Dashboard summary, certificates by status, expiration timeline, job trends, issuance rate |
| **Metrics** | 1 | JSON metrics (gauges, counters, uptime) | | **Metrics** | 2 | JSON metrics (gauges, counters, uptime), Prometheus exposition format |
| **Health** | 4 | Health check, readiness check, auth info, auth check | | **Health** | 4 | Health check, readiness check, auth info, auth check |
--- ---
@@ -411,6 +412,60 @@ Each discovered certificate is parsed and its metadata extracted:
--- ---
## Network Certificate Discovery (M21)
### Overview
Server-side active TLS scanning probes network endpoints across CIDR ranges, extracts certificate metadata from TLS handshakes, and feeds results into the existing filesystem discovery pipeline. No agent deployment required — the control plane scans directly.
### Configuration
- **Enable**`CERTCTL_NETWORK_SCAN_ENABLED=true` (disabled by default)
- **Scan Interval**`CERTCTL_NETWORK_SCAN_INTERVAL=6h` (default 6 hours, configurable)
### Network Scan Targets
Scan targets define what CIDR ranges and ports to probe.
| Field | Details | Example |
|-------|---------|---------|
| **ID** | Prefixed text PK (nst-xxx) | nst-datacenter-east |
| **Name** | Human-readable target name | Datacenter East Production |
| **CIDRs** | Array of CIDR ranges | ["10.0.1.0/24", "10.0.2.0/24"] |
| **Ports** | Array of TCP ports | [443, 8443, 6443] |
| **Enabled** | Toggle scanning on/off | true |
| **Scan Interval Hours** | Per-target scan frequency | 6 |
| **Timeout Ms** | Per-connection timeout | 5000 |
### Scanning Behavior
- **CIDR Expansion** — Ranges expanded to individual IPs; safety cap at /20 (4096 IPs) prevents accidental large scans
- **Concurrent Probing** — 50 goroutines (semaphore-based), configurable timeout per TLS connection
- **TLS Extraction**`crypto/tls.DialWithDialer` with `InsecureSkipVerify=true` discovers all certs including self-signed, expired, and internal CA certs
- **Sentinel Agent Pattern** — Uses `server-scanner` as virtual agent ID, reusing the existing `discovered_certificates` dedup constraint without schema changes
- **Discovery Pipeline** — Scan results feed into `DiscoveryService.ProcessDiscoveryReport()` for fingerprint dedup, audit trail, and triage workflow
### Network Scan API Endpoints (M21)
| Endpoint | Method | Purpose |
|----------|--------|---------|
| `/api/v1/network-scan-targets` | GET | List all scan targets with metrics |
| `/api/v1/network-scan-targets` | POST | Create a new scan target |
| `/api/v1/network-scan-targets/{id}` | GET | Get scan target details |
| `/api/v1/network-scan-targets/{id}` | PUT | Update scan target configuration |
| `/api/v1/network-scan-targets/{id}` | DELETE | Delete a scan target |
| `/api/v1/network-scan-targets/{id}/scan` | POST | Trigger an immediate scan |
### Scheduler Integration
- **6th scheduler loop** — runs at configured interval (default 6h) alongside renewal (1h), jobs (30s), health (2m), notifications (1m), short-lived expiry (30s)
- **Conditional** — only starts if `CERTCTL_NETWORK_SCAN_ENABLED=true` and network scan service is initialized
- **Scan Metrics** — each target tracks `last_scan_at`, `last_scan_duration_ms`, `last_scan_certs_found`
### Use Cases
- **Network Inventory** — "What TLS certs are deployed across my network?" without deploying agents
- **Shadow Certificate Detection** — Find certificates on services you didn't know were running TLS
- **Compliance Scanning** — Prove to auditors that all TLS endpoints are inventoried
- **Migration Assessment** — Scan a network range before onboarding to certctl management
- **Expiration Monitoring** — Discover soon-to-expire certs on network endpoints before they cause outages
---
## Ownership & Accountability ## Ownership & Accountability
### Teams ### Teams
@@ -451,13 +506,23 @@ Live aggregated views of certificate and job metrics.
| **Certificate Status Distribution** | Donut | Pie breakdown: Active, Expiring, Expired, Failed, Revoked, etc. | | **Certificate Status Distribution** | Donut | Pie breakdown: Active, Expiring, Expired, Failed, Revoked, etc. |
| **Issuance Rate** | Bar (30-day) | Certs issued per day; trend line | | **Issuance Rate** | Bar (30-day) | Certs issued per day; trend line |
#### Metrics Endpoint #### Metrics Endpoints
**JSON Format**
- **URL**`GET /api/v1/metrics` - **URL**`GET /api/v1/metrics`
- **Format** — JSON with timestamp - **Format** — JSON with timestamp
- **Gauges** — Certificate counts by status, agent count (online/offline), pending job count - **Gauges** — Certificate counts by status, agent count (online/offline), pending job count
- **Counters** — Total jobs completed, total jobs failed, total renewals, total issuances - **Counters** — Total jobs completed, total jobs failed, total renewals, total issuances
- **Uptime** — Server uptime in seconds - **Uptime** — Server uptime in seconds
**Prometheus Exposition Format (M22)**
- **URL**`GET /api/v1/metrics/prometheus`
- **Content-Type**`text/plain; version=0.0.4; charset=utf-8`
- **Compatible with** — Prometheus, Grafana Agent, Datadog Agent, Victoria Metrics, OpenMetrics scrapers
- **Naming**`certctl_` prefix, snake_case (e.g., `certctl_certificate_total`, `certctl_agent_online`)
- **11 Metrics** — 8 gauges (cert total/active/expiring/expired/revoked, agent total/online, job pending), 2 counters (job completed/failed totals), 1 gauge (uptime seconds)
- **Scrape Config** — Add to `prometheus.yml`: `scrape_configs: [{job_name: certctl, static_configs: [{targets: ['localhost:8443']}], metrics_path: /api/v1/metrics/prometheus}]`
#### Stats API (M14) #### Stats API (M14)
Five parameterized endpoints for dashboard data. Five parameterized endpoints for dashboard data.
@@ -541,7 +606,7 @@ Every API call recorded to immutable `audit_events` table.
3. **Approve**`POST /api/v1/jobs/{id}/approve` → Job → `Running` 3. **Approve**`POST /api/v1/jobs/{id}/approve` → Job → `Running`
4. **Reject**`POST /api/v1/jobs/{id}/reject` + reason → Job → `Cancelled` 4. **Reject**`POST /api/v1/jobs/{id}/reject` + reason → Job → `Cancelled`
### Background Scheduler (5 loops) ### Background Scheduler (6 loops)
| Loop | Interval | Task | | Loop | Interval | Task |
|------|----------|------| |------|----------|------|
| **Renewal Checker** | 1 hour | Scan policies; trigger renewals if cert expires soon | | **Renewal Checker** | 1 hour | Scan policies; trigger renewals if cert expires soon |
@@ -549,6 +614,7 @@ Every API call recorded to immutable `audit_events` table.
| **Health Checker** | 2 minutes | Check agent heartbeat; mark offline if >3 missed | | **Health Checker** | 2 minutes | Check agent heartbeat; mark offline if >3 missed |
| **Notification Processor** | 1 minute | Send queued notifications (email, Slack, webhook, etc.) | | **Notification Processor** | 1 minute | Send queued notifications (email, Slack, webhook, etc.) |
| **Short-Lived Cleanup** | 30 seconds | Audit short-lived credential expirations | | **Short-Lived Cleanup** | 30 seconds | Audit short-lived credential expirations |
| **Network Scanner** | 6 hours | Scan enabled network targets; discover TLS certificates |
All loops have configurable intervals via environment variables (`CERTCTL_SCHEDULER_*_INTERVAL`). All loops have configurable intervals via environment variables (`CERTCTL_SCHEDULER_*_INTERVAL`).
@@ -898,7 +964,7 @@ Each guide includes an evidence summary table mapping specific criteria to certc
| Revocation (RFC 5280, CRL, OCSP) | ✓ | ✓ | Shipped | | Revocation (RFC 5280, CRL, OCSP) | ✓ | ✓ | Shipped |
| Dashboard + 19 pages | ✓ | ✓ | Shipped | | Dashboard + 19 pages | ✓ | ✓ | Shipped |
| Observability (charts, metrics, stats) | ✓ | ✓ | Shipped | | Observability (charts, metrics, stats) | ✓ | ✓ | Shipped |
| REST API (84 endpoints) | ✓ | ✓ | Shipped | | REST API (91 endpoints) | ✓ | ✓ | Shipped |
| MCP server (76 tools) | ✓ | ✓ | Shipped v2.1 | | MCP server (76 tools) | ✓ | ✓ | Shipped v2.1 |
| CLI tool (10 subcommands) | ✓ | ✓ | Shipped | | CLI tool (10 subcommands) | ✓ | ✓ | Shipped |
| Compliance mapping docs (SOC 2, PCI-DSS, NIST) | ✓ | ✓ | Shipped | | Compliance mapping docs (SOC 2, PCI-DSS, NIST) | ✓ | ✓ | Shipped |
+33 -1
View File
@@ -295,8 +295,11 @@ curl -s "http://localhost:8443/api/v1/stats/expiration-timeline?days=90" | jq .
# Job trends (last 30 days) # Job trends (last 30 days)
curl -s "http://localhost:8443/api/v1/stats/job-trends?days=30" | jq . curl -s "http://localhost:8443/api/v1/stats/job-trends?days=30" | jq .
# System metrics # System metrics (JSON)
curl -s http://localhost:8443/api/v1/metrics | jq . curl -s http://localhost:8443/api/v1/metrics | jq .
# System metrics (Prometheus format — for scraping by Prometheus, Grafana Agent, Datadog)
curl -s http://localhost:8443/api/v1/metrics/prometheus
``` ```
### Certificate profiles ### Certificate profiles
@@ -364,6 +367,35 @@ curl -s -X POST "http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_
-d '{"managed_certificate_id": "mc-api-prod"}' | jq . -d '{"managed_certificate_id": "mc-api-prod"}' | jq .
``` ```
### Network Certificate Discovery
The server can also discover certificates by scanning TLS endpoints directly — no agent required:
```bash
# Enable network scanning (set in environment or docker-compose)
export CERTCTL_NETWORK_SCAN_ENABLED=true
# Create a scan target (e.g., scan your internal network on port 443)
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
-H "Content-Type: application/json" \
-d '{
"name": "Internal Network",
"cidrs": ["10.0.1.0/24"],
"ports": [443, 8443],
"enabled": true,
"scan_interval_hours": 6,
"timeout_ms": 5000
}' | jq .
# Trigger an immediate scan
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets/nst-internal-network/scan | jq .
# List scan targets with results
curl -s http://localhost:8443/api/v1/network-scan-targets | jq .
```
Discovered network certificates appear in the same `GET /api/v1/discovered-certificates` list as filesystem-discovered certs, with `agent_id=server-scanner` and `source_format=network`.
## What's Next ## What's Next
- **[Advanced Demo](demo-advanced.md)** — Issue a real certificate via the Local CA and watch it appear in the dashboard - **[Advanced Demo](demo-advanced.md)** — Issue a real certificate via the Local CA and watch it appear in the dashboard
+92 -3
View File
@@ -3,6 +3,7 @@ package handler
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"fmt"
"net/http" "net/http"
"time" "time"
@@ -14,9 +15,9 @@ type MetricsService interface {
GetDashboardSummary(ctx context.Context) (interface{}, error) GetDashboardSummary(ctx context.Context) (interface{}, error)
} }
// MetricsHandler handles HTTP requests for Prometheus-style metrics. // MetricsHandler handles HTTP requests for metrics.
// In V2, returns JSON metrics (not Prometheus format). // Supports both JSON format (GET /api/v1/metrics) and Prometheus exposition format
// Prometheus format can be added in V3 when observability becomes a paid feature. // (GET /api/v1/metrics/prometheus) for integration with Prometheus, Grafana, Datadog, etc.
type MetricsHandler struct { type MetricsHandler struct {
svc MetricsService svc MetricsService
serverStarted time.Time serverStarted time.Time
@@ -117,6 +118,94 @@ func (h MetricsHandler) GetMetrics(w http.ResponseWriter, r *http.Request) {
JSON(w, http.StatusOK, metricsResp) JSON(w, http.StatusOK, metricsResp)
} }
// GetPrometheusMetrics returns metrics in Prometheus exposition format (text/plain).
// GET /api/v1/metrics/prometheus
// Compatible with Prometheus, Grafana Agent, Datadog Agent, Victoria Metrics, and any
// OpenMetrics-compatible scraper. Metric names follow Prometheus naming conventions
// (lowercase, snake_case, prefixed with certctl_).
func (h MetricsHandler) GetPrometheusMetrics(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
summary, err := h.svc.GetDashboardSummary(r.Context())
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to collect metrics", requestID)
return
}
// Extract fields from summary via JSON round-trip (avoids cross-package type assertion)
jsonBytes, err := json.Marshal(summary)
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to marshal metrics data", requestID)
return
}
var dashboardSummary DashboardSummary
if err := json.Unmarshal(jsonBytes, &dashboardSummary); err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Invalid metrics data", requestID)
return
}
// Compute derived values
active := dashboardSummary.TotalCertificates - dashboardSummary.ExpiringCertificates - dashboardSummary.ExpiredCertificates - dashboardSummary.RevokedCertificates
uptimeSeconds := int64(time.Since(h.serverStarted).Seconds())
// Build Prometheus exposition format
// See: https://prometheus.io/docs/instrumenting/exposition_formats/
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
w.WriteHeader(http.StatusOK)
// Gauges — point-in-time values
fmt.Fprintf(w, "# HELP certctl_certificate_total Total number of managed certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_total gauge\n")
fmt.Fprintf(w, "certctl_certificate_total %d\n\n", dashboardSummary.TotalCertificates)
fmt.Fprintf(w, "# HELP certctl_certificate_active Number of active (non-expiring, non-expired, non-revoked) certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_active gauge\n")
fmt.Fprintf(w, "certctl_certificate_active %d\n\n", active)
fmt.Fprintf(w, "# HELP certctl_certificate_expiring_soon Number of certificates expiring within 30 days.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_expiring_soon gauge\n")
fmt.Fprintf(w, "certctl_certificate_expiring_soon %d\n\n", dashboardSummary.ExpiringCertificates)
fmt.Fprintf(w, "# HELP certctl_certificate_expired Number of expired certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_expired gauge\n")
fmt.Fprintf(w, "certctl_certificate_expired %d\n\n", dashboardSummary.ExpiredCertificates)
fmt.Fprintf(w, "# HELP certctl_certificate_revoked Number of revoked certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_revoked gauge\n")
fmt.Fprintf(w, "certctl_certificate_revoked %d\n\n", dashboardSummary.RevokedCertificates)
fmt.Fprintf(w, "# HELP certctl_agent_total Total number of registered agents.\n")
fmt.Fprintf(w, "# TYPE certctl_agent_total gauge\n")
fmt.Fprintf(w, "certctl_agent_total %d\n\n", dashboardSummary.TotalAgents)
fmt.Fprintf(w, "# HELP certctl_agent_online Number of agents currently online.\n")
fmt.Fprintf(w, "# TYPE certctl_agent_online gauge\n")
fmt.Fprintf(w, "certctl_agent_online %d\n\n", dashboardSummary.ActiveAgents)
fmt.Fprintf(w, "# HELP certctl_job_pending Number of jobs currently pending.\n")
fmt.Fprintf(w, "# TYPE certctl_job_pending gauge\n")
fmt.Fprintf(w, "certctl_job_pending %d\n\n", dashboardSummary.PendingJobs)
// Counters — cumulative values
fmt.Fprintf(w, "# HELP certctl_job_completed_total Total number of completed jobs.\n")
fmt.Fprintf(w, "# TYPE certctl_job_completed_total counter\n")
fmt.Fprintf(w, "certctl_job_completed_total %d\n\n", dashboardSummary.CompleteJobs)
fmt.Fprintf(w, "# HELP certctl_job_failed_total Total number of failed jobs.\n")
fmt.Fprintf(w, "# TYPE certctl_job_failed_total counter\n")
fmt.Fprintf(w, "certctl_job_failed_total %d\n\n", dashboardSummary.FailedJobs)
// Info — server uptime
fmt.Fprintf(w, "# HELP certctl_uptime_seconds Server uptime in seconds.\n")
fmt.Fprintf(w, "# TYPE certctl_uptime_seconds gauge\n")
fmt.Fprintf(w, "certctl_uptime_seconds %d\n", uptimeSeconds)
}
// DashboardSummary mirrors the service.DashboardSummary for JSON unmarshaling. // DashboardSummary mirrors the service.DashboardSummary for JSON unmarshaling.
// JSON tags must match the service-layer struct exactly. // JSON tags must match the service-layer struct exactly.
type DashboardSummary struct { type DashboardSummary struct {
+179
View File
@@ -0,0 +1,179 @@
package handler
import (
"context"
"encoding/json"
"fmt"
"net/http"
"github.com/shankar0123/certctl/internal/domain"
)
// NetworkScanService defines the interface used by the network scan handler.
type NetworkScanService interface {
ListTargets(ctx context.Context) ([]*domain.NetworkScanTarget, error)
GetTarget(ctx context.Context, id string) (*domain.NetworkScanTarget, error)
CreateTarget(ctx context.Context, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error)
UpdateTarget(ctx context.Context, id string, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error)
DeleteTarget(ctx context.Context, id string) error
TriggerScan(ctx context.Context, targetID string) (*domain.DiscoveryScan, error)
}
// NetworkScanHandler handles HTTP requests for network scan targets.
type NetworkScanHandler struct {
svc NetworkScanService
}
// NewNetworkScanHandler creates a new network scan handler.
func NewNetworkScanHandler(svc NetworkScanService) NetworkScanHandler {
return NetworkScanHandler{svc: svc}
}
// ListNetworkScanTargets handles GET /api/v1/network-scan-targets
func (h NetworkScanHandler) ListNetworkScanTargets(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
targets, err := h.svc.ListTargets(r.Context())
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to list network scan targets: %v", err))
return
}
if targets == nil {
targets = []*domain.NetworkScanTarget{}
}
JSON(w, http.StatusOK, PagedResponse{
Data: targets,
Total: int64(len(targets)),
Page: 1,
PerPage: len(targets),
})
}
// GetNetworkScanTarget handles GET /api/v1/network-scan-targets/{id}
func (h NetworkScanHandler) GetNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
target, err := h.svc.GetTarget(r.Context(), id)
if err != nil {
Error(w, http.StatusNotFound, fmt.Sprintf("network scan target not found: %v", err))
return
}
JSON(w, http.StatusOK, target)
}
// CreateNetworkScanTarget handles POST /api/v1/network-scan-targets
func (h NetworkScanHandler) CreateNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
var target domain.NetworkScanTarget
if err := json.NewDecoder(r.Body).Decode(&target); err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
return
}
created, err := h.svc.CreateTarget(r.Context(), &target)
if err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("failed to create network scan target: %v", err))
return
}
JSON(w, http.StatusCreated, created)
}
// UpdateNetworkScanTarget handles PUT /api/v1/network-scan-targets/{id}
func (h NetworkScanHandler) UpdateNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPut {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
var target domain.NetworkScanTarget
if err := json.NewDecoder(r.Body).Decode(&target); err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
return
}
updated, err := h.svc.UpdateTarget(r.Context(), id, &target)
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to update network scan target: %v", err))
return
}
JSON(w, http.StatusOK, updated)
}
// DeleteNetworkScanTarget handles DELETE /api/v1/network-scan-targets/{id}
func (h NetworkScanHandler) DeleteNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodDelete {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
if err := h.svc.DeleteTarget(r.Context(), id); err != nil {
Error(w, http.StatusNotFound, fmt.Sprintf("failed to delete network scan target: %v", err))
return
}
JSON(w, http.StatusNoContent, nil)
}
// TriggerNetworkScan handles POST /api/v1/network-scan-targets/{id}/scan
func (h NetworkScanHandler) TriggerNetworkScan(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
scan, err := h.svc.TriggerScan(r.Context(), id)
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to trigger scan: %v", err))
return
}
// scan may be nil if no certs found
if scan == nil {
JSON(w, http.StatusOK, map[string]string{
"status": "completed",
"message": "Scan completed, no certificates found",
})
return
}
JSON(w, http.StatusAccepted, scan)
}
@@ -0,0 +1,220 @@
package handler
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"github.com/shankar0123/certctl/internal/domain"
)
// mockNetworkScanService implements NetworkScanService for testing.
type mockNetworkScanService struct {
targets []*domain.NetworkScanTarget
}
func (m *mockNetworkScanService) ListTargets(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
return m.targets, nil
}
func (m *mockNetworkScanService) GetTarget(ctx context.Context, id string) (*domain.NetworkScanTarget, error) {
for _, t := range m.targets {
if t.ID == id {
return t, nil
}
}
return nil, fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanService) CreateTarget(ctx context.Context, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
if target.Name == "" {
return nil, fmt.Errorf("name is required")
}
target.ID = "nst-test-123"
m.targets = append(m.targets, target)
return target, nil
}
func (m *mockNetworkScanService) UpdateTarget(ctx context.Context, id string, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
for _, t := range m.targets {
if t.ID == id {
if target.Name != "" {
t.Name = target.Name
}
return t, nil
}
}
return nil, fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanService) DeleteTarget(ctx context.Context, id string) error {
for i, t := range m.targets {
if t.ID == id {
m.targets = append(m.targets[:i], m.targets[i+1:]...)
return nil
}
}
return fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanService) TriggerScan(ctx context.Context, targetID string) (*domain.DiscoveryScan, error) {
for _, t := range m.targets {
if t.ID == targetID {
return &domain.DiscoveryScan{
ID: "dscan-test",
AgentID: "server-scanner",
CertificatesFound: 3,
}, nil
}
}
return nil, fmt.Errorf("not found: %s", targetID)
}
func TestListNetworkScanTargets(t *testing.T) {
svc := &mockNetworkScanService{
targets: []*domain.NetworkScanTarget{
{ID: "nst-1", Name: "target1", CIDRs: []string{"10.0.0.0/24"}, Ports: []int{443}},
{ID: "nst-2", Name: "target2", CIDRs: []string{"192.168.0.0/16"}, Ports: []int{443, 8443}},
},
}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodGet, "/api/v1/network-scan-targets", nil)
w := httptest.NewRecorder()
h.ListNetworkScanTargets(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
var resp PagedResponse
json.NewDecoder(w.Body).Decode(&resp)
if resp.Total != 2 {
t.Errorf("expected total 2, got %d", resp.Total)
}
}
func TestListNetworkScanTargets_Empty(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodGet, "/api/v1/network-scan-targets", nil)
w := httptest.NewRecorder()
h.ListNetworkScanTargets(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestCreateNetworkScanTarget(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
body, _ := json.Marshal(map[string]interface{}{
"name": "Production",
"cidrs": []string{"10.0.0.0/24"},
"ports": []int{443},
})
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", bytes.NewReader(body))
w := httptest.NewRecorder()
h.CreateNetworkScanTarget(w, req)
if w.Code != http.StatusCreated {
t.Errorf("expected 201, got %d: %s", w.Code, w.Body.String())
}
}
func TestCreateNetworkScanTarget_InvalidJSON(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", bytes.NewReader([]byte("not json")))
w := httptest.NewRecorder()
h.CreateNetworkScanTarget(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d", w.Code)
}
}
func TestCreateNetworkScanTarget_MissingName(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
body, _ := json.Marshal(map[string]interface{}{
"cidrs": []string{"10.0.0.0/24"},
})
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", bytes.NewReader(body))
w := httptest.NewRecorder()
h.CreateNetworkScanTarget(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d", w.Code)
}
}
func TestDeleteNetworkScanTarget_NotFound(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodDelete, "/api/v1/network-scan-targets/nst-nonexistent", nil)
req.SetPathValue("id", "nst-nonexistent")
w := httptest.NewRecorder()
h.DeleteNetworkScanTarget(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d", w.Code)
}
}
func TestTriggerNetworkScan(t *testing.T) {
svc := &mockNetworkScanService{
targets: []*domain.NetworkScanTarget{
{ID: "nst-1", Name: "target1"},
},
}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets/nst-1/scan", nil)
req.SetPathValue("id", "nst-1")
w := httptest.NewRecorder()
h.TriggerNetworkScan(w, req)
if w.Code != http.StatusAccepted {
t.Errorf("expected 202, got %d: %s", w.Code, w.Body.String())
}
}
func TestTriggerNetworkScan_NotFound(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets/nst-nonexistent/scan", nil)
req.SetPathValue("id", "nst-nonexistent")
w := httptest.NewRecorder()
h.TriggerNetworkScan(w, req)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d", w.Code)
}
}
func TestListNetworkScanTargets_MethodNotAllowed(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", nil)
w := httptest.NewRecorder()
h.ListNetworkScanTargets(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("expected 405, got %d", w.Code)
}
}
+114
View File
@@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"strings"
"testing" "testing"
"time" "time"
) )
@@ -202,3 +203,116 @@ func TestGetMetrics_ServiceError(t *testing.T) {
t.Errorf("expected 500, got %d", w.Code) t.Errorf("expected 500, got %d", w.Code)
} }
} }
// --- Prometheus metrics endpoint tests ---
func TestGetPrometheusMetrics_Success(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return &DashboardSummary{
TotalCertificates: 25,
ExpiringCertificates: 3,
ExpiredCertificates: 2,
RevokedCertificates: 1,
ActiveAgents: 4,
TotalAgents: 6,
PendingJobs: 2,
FailedJobs: 1,
CompleteJobs: 15,
}, nil
},
}
h := NewMetricsHandler(mock, time.Now().Add(-1*time.Hour))
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
contentType := w.Header().Get("Content-Type")
if contentType != "text/plain; version=0.0.4; charset=utf-8" {
t.Errorf("expected Prometheus content type, got %q", contentType)
}
body := w.Body.String()
// Check metric lines are present
expected := []string{
"certctl_certificate_total 25",
"certctl_certificate_active 19",
"certctl_certificate_expiring_soon 3",
"certctl_certificate_expired 2",
"certctl_certificate_revoked 1",
"certctl_agent_total 6",
"certctl_agent_online 4",
"certctl_job_pending 2",
"certctl_job_completed_total 15",
"certctl_job_failed_total 1",
"# TYPE certctl_certificate_total gauge",
"# TYPE certctl_job_completed_total counter",
"# HELP certctl_uptime_seconds",
"# TYPE certctl_uptime_seconds gauge",
}
for _, exp := range expected {
if !containsLine(body, exp) {
t.Errorf("expected body to contain %q", exp)
}
}
}
func TestGetPrometheusMetrics_MethodNotAllowed(t *testing.T) {
mock := &MockStatsService{}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("expected 405, got %d", w.Code)
}
}
func TestGetPrometheusMetrics_ServiceError(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return nil, fmt.Errorf("db error")
},
}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d", w.Code)
}
}
func TestGetPrometheusMetrics_ZeroValues(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return &DashboardSummary{}, nil
},
}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
body := w.Body.String()
if !containsLine(body, "certctl_certificate_total 0") {
t.Error("expected zero value for certificate_total")
}
if !containsLine(body, "certctl_job_pending 0") {
t.Error("expected zero value for job_pending")
}
}
// containsLine checks if the text contains the given substring.
func containsLine(text, substr string) bool {
return strings.Contains(text, substr)
}
+10
View File
@@ -61,6 +61,7 @@ func (r *Router) RegisterHandlers(
metrics handler.MetricsHandler, metrics handler.MetricsHandler,
health handler.HealthHandler, health handler.HealthHandler,
discovery handler.DiscoveryHandler, discovery handler.DiscoveryHandler,
networkScan handler.NetworkScanHandler,
) { ) {
// Health endpoints (no auth middleware — must always be accessible) // Health endpoints (no auth middleware — must always be accessible)
r.mux.Handle("GET /health", middleware.Chain( r.mux.Handle("GET /health", middleware.Chain(
@@ -188,6 +189,7 @@ func (r *Router) RegisterHandlers(
// Metrics routes: /api/v1/metrics // Metrics routes: /api/v1/metrics
r.Register("GET /api/v1/metrics", http.HandlerFunc(metrics.GetMetrics)) r.Register("GET /api/v1/metrics", http.HandlerFunc(metrics.GetMetrics))
r.Register("GET /api/v1/metrics/prometheus", http.HandlerFunc(metrics.GetPrometheusMetrics))
// Discovery routes: /api/v1/discovered-certificates, /api/v1/discovery-scans // Discovery routes: /api/v1/discovered-certificates, /api/v1/discovery-scans
r.Register("POST /api/v1/agents/{id}/discoveries", http.HandlerFunc(discovery.SubmitDiscoveryReport)) r.Register("POST /api/v1/agents/{id}/discoveries", http.HandlerFunc(discovery.SubmitDiscoveryReport))
@@ -197,6 +199,14 @@ func (r *Router) RegisterHandlers(
r.Register("POST /api/v1/discovered-certificates/{id}/dismiss", http.HandlerFunc(discovery.DismissDiscovered)) r.Register("POST /api/v1/discovered-certificates/{id}/dismiss", http.HandlerFunc(discovery.DismissDiscovered))
r.Register("GET /api/v1/discovery-scans", http.HandlerFunc(discovery.ListScans)) r.Register("GET /api/v1/discovery-scans", http.HandlerFunc(discovery.ListScans))
r.Register("GET /api/v1/discovery-summary", http.HandlerFunc(discovery.GetDiscoverySummary)) r.Register("GET /api/v1/discovery-summary", http.HandlerFunc(discovery.GetDiscoverySummary))
// Network scan routes: /api/v1/network-scan-targets
r.Register("GET /api/v1/network-scan-targets", http.HandlerFunc(networkScan.ListNetworkScanTargets))
r.Register("POST /api/v1/network-scan-targets", http.HandlerFunc(networkScan.CreateNetworkScanTarget))
r.Register("GET /api/v1/network-scan-targets/{id}", http.HandlerFunc(networkScan.GetNetworkScanTarget))
r.Register("PUT /api/v1/network-scan-targets/{id}", http.HandlerFunc(networkScan.UpdateNetworkScanTarget))
r.Register("DELETE /api/v1/network-scan-targets/{id}", http.HandlerFunc(networkScan.DeleteNetworkScanTarget))
r.Register("POST /api/v1/network-scan-targets/{id}/scan", http.HandlerFunc(networkScan.TriggerNetworkScan))
} }
// GetMux returns the underlying http.ServeMux for direct access if needed. // GetMux returns the underlying http.ServeMux for direct access if needed.
+21 -10
View File
@@ -11,16 +11,17 @@ import (
// Config represents the complete application configuration. // Config represents the complete application configuration.
// All configuration values are read from environment variables with CERTCTL_ prefix. // All configuration values are read from environment variables with CERTCTL_ prefix.
type Config struct { type Config struct {
Server ServerConfig Server ServerConfig
Database DatabaseConfig Database DatabaseConfig
Scheduler SchedulerConfig Scheduler SchedulerConfig
Log LogConfig Log LogConfig
Auth AuthConfig Auth AuthConfig
RateLimit RateLimitConfig RateLimit RateLimitConfig
CORS CORSConfig CORS CORSConfig
Keygen KeygenConfig Keygen KeygenConfig
CA CAConfig CA CAConfig
Notifiers NotifierConfig Notifiers NotifierConfig
NetworkScan NetworkScanConfig
} }
// NotifierConfig contains configuration for notification connectors. // NotifierConfig contains configuration for notification connectors.
@@ -80,6 +81,12 @@ type OpenSSLConfig struct {
TimeoutSeconds int TimeoutSeconds int
} }
// NetworkScanConfig controls the server-side active TLS scanner.
type NetworkScanConfig struct {
Enabled bool // Enable network scanning (default false)
ScanInterval time.Duration // How often to run network scans (default 6h)
}
// ServerConfig contains HTTP server configuration. // ServerConfig contains HTTP server configuration.
type ServerConfig struct { type ServerConfig struct {
Host string Host string
@@ -178,6 +185,10 @@ func Load() (*Config, error) {
OpsGenieAPIKey: getEnv("CERTCTL_OPSGENIE_API_KEY", ""), OpsGenieAPIKey: getEnv("CERTCTL_OPSGENIE_API_KEY", ""),
OpsGeniePriority: getEnv("CERTCTL_OPSGENIE_PRIORITY", "P3"), OpsGeniePriority: getEnv("CERTCTL_OPSGENIE_PRIORITY", "P3"),
}, },
NetworkScan: NetworkScanConfig{
Enabled: getEnvBool("CERTCTL_NETWORK_SCAN_ENABLED", false),
ScanInterval: getEnvDuration("CERTCTL_NETWORK_SCAN_INTERVAL", 6*time.Hour),
},
} }
if err := cfg.Validate(); err != nil { if err := cfg.Validate(); err != nil {
+27
View File
@@ -0,0 +1,27 @@
package domain
import "time"
// NetworkScanTarget defines a network range to scan for TLS certificates.
type NetworkScanTarget struct {
ID string `json:"id"`
Name string `json:"name"`
CIDRs []string `json:"cidrs"`
Ports []int `json:"ports"`
Enabled bool `json:"enabled"`
ScanIntervalHours int `json:"scan_interval_hours"`
TimeoutMs int `json:"timeout_ms"`
LastScanAt *time.Time `json:"last_scan_at,omitempty"`
LastScanDurationMs *int `json:"last_scan_duration_ms,omitempty"`
LastScanCertsFound *int `json:"last_scan_certs_found,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// NetworkScanResult holds the outcome of scanning a single endpoint.
type NetworkScanResult struct {
Address string // "ip:port"
Certs []DiscoveredCertEntry
Error string
LatencyMs int
}
+67
View File
@@ -0,0 +1,67 @@
package domain
import (
"testing"
"time"
)
func TestNetworkScanTarget_Defaults(t *testing.T) {
target := NetworkScanTarget{
ID: "nst-test",
Name: "Test Target",
CIDRs: []string{"10.0.0.0/24"},
Ports: []int{443},
Enabled: true,
ScanIntervalHours: 6,
TimeoutMs: 5000,
}
if target.ID != "nst-test" {
t.Errorf("expected ID nst-test, got %s", target.ID)
}
if len(target.CIDRs) != 1 || target.CIDRs[0] != "10.0.0.0/24" {
t.Errorf("unexpected CIDRs: %v", target.CIDRs)
}
if target.LastScanAt != nil {
t.Error("expected nil LastScanAt for new target")
}
}
func TestNetworkScanTarget_WithScanResults(t *testing.T) {
now := time.Now()
duration := 1500
found := 12
target := NetworkScanTarget{
ID: "nst-prod",
Name: "Production Network",
CIDRs: []string{"192.168.1.0/24", "10.0.0.0/16"},
Ports: []int{443, 8443, 636},
Enabled: true,
ScanIntervalHours: 1,
TimeoutMs: 3000,
LastScanAt: &now,
LastScanDurationMs: &duration,
LastScanCertsFound: &found,
}
if len(target.Ports) != 3 {
t.Errorf("expected 3 ports, got %d", len(target.Ports))
}
if *target.LastScanCertsFound != 12 {
t.Errorf("expected 12 certs found, got %d", *target.LastScanCertsFound)
}
}
func TestNetworkScanResult_Fields(t *testing.T) {
result := NetworkScanResult{
Address: "192.168.1.1:443",
Error: "",
LatencyMs: 45,
}
if result.Address != "192.168.1.1:443" {
t.Errorf("expected address 192.168.1.1:443, got %s", result.Address)
}
if result.LatencyMs != 45 {
t.Errorf("expected latency 45ms, got %d", result.LatencyMs)
}
}
+29
View File
@@ -80,6 +80,7 @@ func TestCertificateLifecycle(t *testing.T) {
metricsHandler := handler.NewMetricsHandler(&mockStatsService{}, time.Now()) metricsHandler := handler.NewMetricsHandler(&mockStatsService{}, time.Now())
healthHandler := handler.NewHealthHandler("none") healthHandler := handler.NewHealthHandler("none")
discoveryHandler := handler.NewDiscoveryHandler(&mockDiscoveryService{}) discoveryHandler := handler.NewDiscoveryHandler(&mockDiscoveryService{})
networkScanHandler := handler.NewNetworkScanHandler(&mockNetworkScanService{})
// Create router and register handlers // Create router and register handlers
r := router.New() r := router.New()
@@ -100,6 +101,7 @@ func TestCertificateLifecycle(t *testing.T) {
metricsHandler, metricsHandler,
healthHandler, healthHandler,
discoveryHandler, discoveryHandler,
networkScanHandler,
) )
// Create test server // Create test server
@@ -1174,3 +1176,30 @@ func (m *mockDiscoveryService) GetScan(ctx context.Context, id string) (*domain.
func (m *mockDiscoveryService) GetDiscoverySummary(ctx context.Context) (map[string]int, error) { func (m *mockDiscoveryService) GetDiscoverySummary(ctx context.Context) (map[string]int, error) {
return map[string]int{}, nil return map[string]int{}, nil
} }
// mockNetworkScanService implements handler.NetworkScanService for integration tests.
type mockNetworkScanService struct{}
func (m *mockNetworkScanService) ListTargets(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
return nil, nil
}
func (m *mockNetworkScanService) GetTarget(ctx context.Context, id string) (*domain.NetworkScanTarget, error) {
return nil, fmt.Errorf("not found")
}
func (m *mockNetworkScanService) CreateTarget(ctx context.Context, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
return target, nil
}
func (m *mockNetworkScanService) UpdateTarget(ctx context.Context, id string, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
return target, nil
}
func (m *mockNetworkScanService) DeleteTarget(ctx context.Context, id string) error {
return nil
}
func (m *mockNetworkScanService) TriggerScan(ctx context.Context, targetID string) (*domain.DiscoveryScan, error) {
return nil, nil
}
+4
View File
@@ -73,6 +73,7 @@ func setupTestServer(t *testing.T) (*httptest.Server, *mockCertificateRepository
metricsHandler := handler.NewMetricsHandler(&mockStatsService{}, time.Now()) metricsHandler := handler.NewMetricsHandler(&mockStatsService{}, time.Now())
healthHandler := handler.NewHealthHandler("none") healthHandler := handler.NewHealthHandler("none")
discoveryHandler := handler.NewDiscoveryHandler(&mockDiscoveryService{}) discoveryHandler := handler.NewDiscoveryHandler(&mockDiscoveryService{})
networkScanHandler := handler.NewNetworkScanHandler(&mockNetworkScanService{})
r := router.New() r := router.New()
r.RegisterHandlers( r.RegisterHandlers(
@@ -92,6 +93,7 @@ func setupTestServer(t *testing.T) (*httptest.Server, *mockCertificateRepository
metricsHandler, metricsHandler,
healthHandler, healthHandler,
discoveryHandler, discoveryHandler,
networkScanHandler,
) )
server := httptest.NewServer(r) server := httptest.NewServer(r)
@@ -796,3 +798,5 @@ func TestRevocationEndpoints(t *testing.T) {
} }
}) })
} }
// mockNetworkScanService is defined in lifecycle_test.go (same package)
+18
View File
@@ -238,6 +238,24 @@ type DiscoveryFilter struct {
PerPage int PerPage int
} }
// NetworkScanRepository defines operations for managing network scan targets.
type NetworkScanRepository interface {
// List returns all network scan targets.
List(ctx context.Context) ([]*domain.NetworkScanTarget, error)
// ListEnabled returns only enabled scan targets.
ListEnabled(ctx context.Context) ([]*domain.NetworkScanTarget, error)
// Get retrieves a network scan target by ID.
Get(ctx context.Context, id string) (*domain.NetworkScanTarget, error)
// Create stores a new network scan target.
Create(ctx context.Context, target *domain.NetworkScanTarget) error
// Update modifies an existing network scan target.
Update(ctx context.Context, target *domain.NetworkScanTarget) error
// Delete removes a network scan target.
Delete(ctx context.Context, id string) error
// UpdateScanResults records the outcome of the last scan for a target.
UpdateScanResults(ctx context.Context, id string, scanAt time.Time, durationMs int, certsFound int) error
}
// OwnerRepository defines operations for managing certificate owners. // OwnerRepository defines operations for managing certificate owners.
type OwnerRepository interface { type OwnerRepository interface {
// List returns all owners. // List returns all owners.
@@ -0,0 +1,181 @@
package postgres
import (
"context"
"database/sql"
"fmt"
"time"
"github.com/lib/pq"
"github.com/shankar0123/certctl/internal/domain"
)
// NetworkScanRepository implements repository.NetworkScanRepository using PostgreSQL.
type NetworkScanRepository struct {
db *sql.DB
}
// NewNetworkScanRepository creates a new PostgreSQL-backed network scan repository.
func NewNetworkScanRepository(db *sql.DB) *NetworkScanRepository {
return &NetworkScanRepository{db: db}
}
// List returns all network scan targets.
func (r *NetworkScanRepository) List(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
rows, err := r.db.QueryContext(ctx, `
SELECT id, name, cidrs, ports, enabled, scan_interval_hours, timeout_ms,
last_scan_at, last_scan_duration_ms, last_scan_certs_found,
created_at, updated_at
FROM network_scan_targets
ORDER BY created_at DESC`)
if err != nil {
return nil, fmt.Errorf("list network scan targets: %w", err)
}
defer rows.Close()
return r.scanRows(rows)
}
// ListEnabled returns only enabled scan targets.
func (r *NetworkScanRepository) ListEnabled(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
rows, err := r.db.QueryContext(ctx, `
SELECT id, name, cidrs, ports, enabled, scan_interval_hours, timeout_ms,
last_scan_at, last_scan_duration_ms, last_scan_certs_found,
created_at, updated_at
FROM network_scan_targets
WHERE enabled = TRUE
ORDER BY created_at DESC`)
if err != nil {
return nil, fmt.Errorf("list enabled network scan targets: %w", err)
}
defer rows.Close()
return r.scanRows(rows)
}
// Get retrieves a network scan target by ID.
func (r *NetworkScanRepository) Get(ctx context.Context, id string) (*domain.NetworkScanTarget, error) {
target := &domain.NetworkScanTarget{}
var lastScanAt sql.NullTime
var lastScanDurationMs, lastScanCertsFound sql.NullInt64
err := r.db.QueryRowContext(ctx, `
SELECT id, name, cidrs, ports, enabled, scan_interval_hours, timeout_ms,
last_scan_at, last_scan_duration_ms, last_scan_certs_found,
created_at, updated_at
FROM network_scan_targets
WHERE id = $1`, id).Scan(
&target.ID, &target.Name, pq.Array(&target.CIDRs), pq.Array(&target.Ports),
&target.Enabled, &target.ScanIntervalHours, &target.TimeoutMs,
&lastScanAt, &lastScanDurationMs, &lastScanCertsFound,
&target.CreatedAt, &target.UpdatedAt,
)
if err == sql.ErrNoRows {
return nil, fmt.Errorf("network scan target not found: %s", id)
}
if err != nil {
return nil, fmt.Errorf("get network scan target: %w", err)
}
if lastScanAt.Valid {
target.LastScanAt = &lastScanAt.Time
}
if lastScanDurationMs.Valid {
v := int(lastScanDurationMs.Int64)
target.LastScanDurationMs = &v
}
if lastScanCertsFound.Valid {
v := int(lastScanCertsFound.Int64)
target.LastScanCertsFound = &v
}
return target, nil
}
// Create stores a new network scan target.
func (r *NetworkScanRepository) Create(ctx context.Context, target *domain.NetworkScanTarget) error {
_, err := r.db.ExecContext(ctx, `
INSERT INTO network_scan_targets (id, name, cidrs, ports, enabled, scan_interval_hours, timeout_ms, created_at, updated_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
target.ID, target.Name, pq.Array(target.CIDRs), pq.Array(target.Ports),
target.Enabled, target.ScanIntervalHours, target.TimeoutMs,
target.CreatedAt, target.UpdatedAt,
)
if err != nil {
return fmt.Errorf("create network scan target: %w", err)
}
return nil
}
// Update modifies an existing network scan target.
func (r *NetworkScanRepository) Update(ctx context.Context, target *domain.NetworkScanTarget) error {
result, err := r.db.ExecContext(ctx, `
UPDATE network_scan_targets
SET name = $1, cidrs = $2, ports = $3, enabled = $4, scan_interval_hours = $5, timeout_ms = $6, updated_at = $7
WHERE id = $8`,
target.Name, pq.Array(target.CIDRs), pq.Array(target.Ports),
target.Enabled, target.ScanIntervalHours, target.TimeoutMs,
time.Now(), target.ID,
)
if err != nil {
return fmt.Errorf("update network scan target: %w", err)
}
rows, _ := result.RowsAffected()
if rows == 0 {
return fmt.Errorf("network scan target not found: %s", target.ID)
}
return nil
}
// Delete removes a network scan target.
func (r *NetworkScanRepository) Delete(ctx context.Context, id string) error {
result, err := r.db.ExecContext(ctx, `DELETE FROM network_scan_targets WHERE id = $1`, id)
if err != nil {
return fmt.Errorf("delete network scan target: %w", err)
}
rows, _ := result.RowsAffected()
if rows == 0 {
return fmt.Errorf("network scan target not found: %s", id)
}
return nil
}
// UpdateScanResults records the outcome of the last scan for a target.
func (r *NetworkScanRepository) UpdateScanResults(ctx context.Context, id string, scanAt time.Time, durationMs int, certsFound int) error {
_, err := r.db.ExecContext(ctx, `
UPDATE network_scan_targets
SET last_scan_at = $1, last_scan_duration_ms = $2, last_scan_certs_found = $3, updated_at = $4
WHERE id = $5`,
scanAt, durationMs, certsFound, time.Now(), id,
)
if err != nil {
return fmt.Errorf("update scan results: %w", err)
}
return nil
}
// scanRows scans multiple rows from a query result.
func (r *NetworkScanRepository) scanRows(rows *sql.Rows) ([]*domain.NetworkScanTarget, error) {
var targets []*domain.NetworkScanTarget
for rows.Next() {
target := &domain.NetworkScanTarget{}
var lastScanAt sql.NullTime
var lastScanDurationMs, lastScanCertsFound sql.NullInt64
if err := rows.Scan(
&target.ID, &target.Name, pq.Array(&target.CIDRs), pq.Array(&target.Ports),
&target.Enabled, &target.ScanIntervalHours, &target.TimeoutMs,
&lastScanAt, &lastScanDurationMs, &lastScanCertsFound,
&target.CreatedAt, &target.UpdatedAt,
); err != nil {
return nil, fmt.Errorf("scan network scan target row: %w", err)
}
if lastScanAt.Valid {
target.LastScanAt = &lastScanAt.Time
}
if lastScanDurationMs.Valid {
v := int(lastScanDurationMs.Int64)
target.LastScanDurationMs = &v
}
if lastScanCertsFound.Valid {
v := int(lastScanCertsFound.Int64)
target.LastScanCertsFound = &v
}
targets = append(targets, target)
}
return targets, rows.Err()
}
+45
View File
@@ -16,6 +16,7 @@ type Scheduler struct {
jobService *service.JobService jobService *service.JobService
agentService *service.AgentService agentService *service.AgentService
notificationService *service.NotificationService notificationService *service.NotificationService
networkScanService *service.NetworkScanService
logger *slog.Logger logger *slog.Logger
// Configurable tick intervals // Configurable tick intervals
@@ -24,6 +25,7 @@ type Scheduler struct {
agentHealthCheckInterval time.Duration agentHealthCheckInterval time.Duration
notificationProcessInterval time.Duration notificationProcessInterval time.Duration
shortLivedExpiryCheckInterval time.Duration shortLivedExpiryCheckInterval time.Duration
networkScanInterval time.Duration
} }
// NewScheduler creates a new scheduler with configurable intervals. // NewScheduler creates a new scheduler with configurable intervals.
@@ -32,6 +34,7 @@ func NewScheduler(
jobService *service.JobService, jobService *service.JobService,
agentService *service.AgentService, agentService *service.AgentService,
notificationService *service.NotificationService, notificationService *service.NotificationService,
networkScanService *service.NetworkScanService,
logger *slog.Logger, logger *slog.Logger,
) *Scheduler { ) *Scheduler {
return &Scheduler{ return &Scheduler{
@@ -39,6 +42,7 @@ func NewScheduler(
jobService: jobService, jobService: jobService,
agentService: agentService, agentService: agentService,
notificationService: notificationService, notificationService: notificationService,
networkScanService: networkScanService,
logger: logger, logger: logger,
// Default intervals // Default intervals
@@ -47,6 +51,7 @@ func NewScheduler(
agentHealthCheckInterval: 2 * time.Minute, agentHealthCheckInterval: 2 * time.Minute,
notificationProcessInterval: 1 * time.Minute, notificationProcessInterval: 1 * time.Minute,
shortLivedExpiryCheckInterval: 30 * time.Second, shortLivedExpiryCheckInterval: 30 * time.Second,
networkScanInterval: 6 * time.Hour,
} }
} }
@@ -70,6 +75,11 @@ func (s *Scheduler) SetNotificationProcessInterval(d time.Duration) {
s.notificationProcessInterval = d s.notificationProcessInterval = d
} }
// SetNetworkScanInterval configures the interval for network scanning.
func (s *Scheduler) SetNetworkScanInterval(d time.Duration) {
s.networkScanInterval = d
}
// Start initiates all background scheduler loops. It returns a channel that signals // Start initiates all background scheduler loops. It returns a channel that signals
// when the scheduler has started all loops. The scheduler runs until the context is cancelled. // when the scheduler has started all loops. The scheduler runs until the context is cancelled.
func (s *Scheduler) Start(ctx context.Context) <-chan struct{} { func (s *Scheduler) Start(ctx context.Context) <-chan struct{} {
@@ -90,6 +100,9 @@ func (s *Scheduler) Start(ctx context.Context) <-chan struct{} {
go s.agentHealthCheckLoop(ctx) go s.agentHealthCheckLoop(ctx)
go s.notificationProcessLoop(ctx) go s.notificationProcessLoop(ctx)
go s.shortLivedExpiryCheckLoop(ctx) go s.shortLivedExpiryCheckLoop(ctx)
if s.networkScanService != nil {
go s.networkScanLoop(ctx)
}
// Wait for context cancellation // Wait for context cancellation
<-ctx.Done() <-ctx.Done()
@@ -258,3 +271,35 @@ func (s *Scheduler) runShortLivedExpiryCheck(ctx context.Context) {
s.logger.Debug("short-lived expiry check completed") s.logger.Debug("short-lived expiry check completed")
} }
} }
// networkScanLoop runs every networkScanInterval and performs active TLS scanning
// of configured network targets.
func (s *Scheduler) networkScanLoop(ctx context.Context) {
ticker := time.NewTicker(s.networkScanInterval)
defer ticker.Stop()
// Run immediately on start
s.runNetworkScan(ctx)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
s.runNetworkScan(ctx)
}
}
}
// runNetworkScan executes a single network scan cycle with error recovery.
func (s *Scheduler) runNetworkScan(ctx context.Context) {
opCtx, cancel := context.WithTimeout(ctx, 30*time.Minute)
defer cancel()
if err := s.networkScanService.ScanAllTargets(opCtx); err != nil {
s.logger.Error("network scan failed",
"error", err,
"interval", s.networkScanInterval.String())
} else {
s.logger.Debug("network scan completed")
}
}
+436
View File
@@ -0,0 +1,436 @@
package service
import (
"context"
"crypto/ecdsa"
"crypto/rsa"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/pem"
"fmt"
"log/slog"
"net"
"sync"
"time"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
)
// SentinelAgentID is the agent ID used for network-discovered certificates.
// This allows the existing discovery dedup constraint (fingerprint, agent_id, source_path)
// to work without schema changes.
const SentinelAgentID = "server-scanner"
// NetworkScanService manages active TLS scanning of network endpoints.
type NetworkScanService struct {
networkScanRepo repository.NetworkScanRepository
discoveryService *DiscoveryService
auditService *AuditService
logger *slog.Logger
concurrency int
}
// NewNetworkScanService creates a new network scan service.
func NewNetworkScanService(
networkScanRepo repository.NetworkScanRepository,
discoveryService *DiscoveryService,
auditService *AuditService,
logger *slog.Logger,
) *NetworkScanService {
return &NetworkScanService{
networkScanRepo: networkScanRepo,
discoveryService: discoveryService,
auditService: auditService,
logger: logger,
concurrency: 50,
}
}
// ListTargets returns all network scan targets.
func (s *NetworkScanService) ListTargets(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
return s.networkScanRepo.List(ctx)
}
// GetTarget retrieves a network scan target by ID.
func (s *NetworkScanService) GetTarget(ctx context.Context, id string) (*domain.NetworkScanTarget, error) {
return s.networkScanRepo.Get(ctx, id)
}
// CreateTarget creates a new network scan target.
func (s *NetworkScanService) CreateTarget(ctx context.Context, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
if target.Name == "" {
return nil, fmt.Errorf("name is required")
}
if len(target.CIDRs) == 0 {
return nil, fmt.Errorf("at least one CIDR is required")
}
// Validate CIDRs
for _, cidr := range target.CIDRs {
if _, _, err := net.ParseCIDR(cidr); err != nil {
// Try parsing as plain IP
if ip := net.ParseIP(cidr); ip == nil {
return nil, fmt.Errorf("invalid CIDR or IP: %s", cidr)
}
}
}
if len(target.Ports) == 0 {
target.Ports = []int{443}
}
if target.ScanIntervalHours == 0 {
target.ScanIntervalHours = 6
}
if target.TimeoutMs == 0 {
target.TimeoutMs = 5000
}
target.ID = generateID("nst")
target.Enabled = true
target.CreatedAt = time.Now()
target.UpdatedAt = time.Now()
if err := s.networkScanRepo.Create(ctx, target); err != nil {
return nil, err
}
s.auditService.RecordEvent(ctx, "operator", domain.ActorTypeUser,
"network_scan_target_created", "network_scan_target", target.ID,
map[string]interface{}{
"name": target.Name,
"cidrs": target.CIDRs,
"ports": target.Ports,
})
return target, nil
}
// UpdateTarget updates an existing network scan target.
func (s *NetworkScanService) UpdateTarget(ctx context.Context, id string, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
existing, err := s.networkScanRepo.Get(ctx, id)
if err != nil {
return nil, err
}
if target.Name != "" {
existing.Name = target.Name
}
if len(target.CIDRs) > 0 {
// Validate new CIDRs
for _, cidr := range target.CIDRs {
if _, _, err := net.ParseCIDR(cidr); err != nil {
if ip := net.ParseIP(cidr); ip == nil {
return nil, fmt.Errorf("invalid CIDR or IP: %s", cidr)
}
}
}
existing.CIDRs = target.CIDRs
}
if len(target.Ports) > 0 {
existing.Ports = target.Ports
}
if target.ScanIntervalHours > 0 {
existing.ScanIntervalHours = target.ScanIntervalHours
}
if target.TimeoutMs > 0 {
existing.TimeoutMs = target.TimeoutMs
}
// Always update enabled field (it's a boolean, so 0-value is meaningful)
existing.Enabled = target.Enabled
if err := s.networkScanRepo.Update(ctx, existing); err != nil {
return nil, err
}
return existing, nil
}
// DeleteTarget removes a network scan target.
func (s *NetworkScanService) DeleteTarget(ctx context.Context, id string) error {
if err := s.networkScanRepo.Delete(ctx, id); err != nil {
return err
}
s.auditService.RecordEvent(ctx, "operator", domain.ActorTypeUser,
"network_scan_target_deleted", "network_scan_target", id, nil)
return nil
}
// ScanAllTargets runs the active TLS scan for all enabled targets.
// This is called by the scheduler on the configured interval.
func (s *NetworkScanService) ScanAllTargets(ctx context.Context) error {
targets, err := s.networkScanRepo.ListEnabled(ctx)
if err != nil {
return fmt.Errorf("list enabled targets: %w", err)
}
if len(targets) == 0 {
if s.logger != nil {
s.logger.Debug("no enabled network scan targets")
}
return nil
}
if s.logger != nil {
s.logger.Info("starting network scan", "targets", len(targets))
}
for _, target := range targets {
if ctx.Err() != nil {
return ctx.Err()
}
s.scanTarget(ctx, target)
}
return nil
}
// TriggerScan runs an immediate scan for a specific target.
func (s *NetworkScanService) TriggerScan(ctx context.Context, targetID string) (*domain.DiscoveryScan, error) {
target, err := s.networkScanRepo.Get(ctx, targetID)
if err != nil {
return nil, err
}
return s.scanTarget(ctx, target), nil
}
// scanTarget scans a single network target and feeds results into the discovery pipeline.
func (s *NetworkScanService) scanTarget(ctx context.Context, target *domain.NetworkScanTarget) *domain.DiscoveryScan {
startTime := time.Now()
if s.logger != nil {
s.logger.Info("scanning network target",
"target_id", target.ID,
"name", target.Name,
"cidrs", target.CIDRs,
"ports", target.Ports)
}
// Expand CIDRs to individual IPs
endpoints := s.expandEndpoints(target.CIDRs, target.Ports)
if s.logger != nil {
s.logger.Debug("expanded endpoints", "count", len(endpoints))
}
// Scan endpoints concurrently
timeout := time.Duration(target.TimeoutMs) * time.Millisecond
results := s.scanEndpoints(ctx, endpoints, timeout)
// Collect discovered cert entries
var entries []domain.DiscoveredCertEntry
var scanErrors []string
for _, result := range results {
if result.Error != "" {
// Only log connection errors at debug level (many hosts won't have TLS)
if s.logger != nil {
s.logger.Debug("scan endpoint error",
"address", result.Address,
"error", result.Error)
}
continue
}
entries = append(entries, result.Certs...)
}
scanDuration := time.Since(startTime)
if s.logger != nil {
s.logger.Info("network target scan completed",
"target_id", target.ID,
"endpoints_scanned", len(endpoints),
"certificates_found", len(entries),
"errors", len(scanErrors),
"duration_ms", scanDuration.Milliseconds())
}
// Update scan results on target
s.networkScanRepo.UpdateScanResults(ctx, target.ID, time.Now(),
int(scanDuration.Milliseconds()), len(entries))
// Feed into discovery pipeline if we found certs
if len(entries) == 0 {
return nil
}
// Build directories list from CIDRs for the scan record
dirs := make([]string, len(target.CIDRs))
copy(dirs, target.CIDRs)
report := &domain.DiscoveryReport{
AgentID: SentinelAgentID,
Directories: dirs,
Certificates: entries,
Errors: scanErrors,
ScanDurationMs: int(scanDuration.Milliseconds()),
}
scan, err := s.discoveryService.ProcessDiscoveryReport(ctx, report)
if err != nil {
if s.logger != nil {
s.logger.Error("failed to process network scan report",
"target_id", target.ID,
"error", err)
}
return nil
}
return scan
}
// expandEndpoints converts CIDR ranges and ports into a list of "ip:port" endpoints.
func (s *NetworkScanService) expandEndpoints(cidrs []string, ports []int) []string {
var endpoints []string
for _, cidr := range cidrs {
ips := expandCIDR(cidr)
for _, ip := range ips {
for _, port := range ports {
endpoints = append(endpoints, fmt.Sprintf("%s:%d", ip, port))
}
}
}
return endpoints
}
// expandCIDR expands a CIDR notation or single IP into a list of IPs.
// Limits expansion to /20 (4096 IPs) to prevent accidental huge scans.
func expandCIDR(cidr string) []string {
// Try as CIDR first
ip, ipNet, err := net.ParseCIDR(cidr)
if err != nil {
// Try as single IP
if singleIP := net.ParseIP(cidr); singleIP != nil {
return []string{singleIP.String()}
}
return nil
}
// Count network size and cap at /20
ones, bits := ipNet.Mask.Size()
hostBits := bits - ones
if hostBits > 12 { // More than 4096 hosts
return nil // Skip overly large networks
}
var ips []string
for ip := ip.Mask(ipNet.Mask); ipNet.Contains(ip); incrementIP(ip) {
// Copy IP before appending (net.IP is a mutable slice)
ipCopy := make(net.IP, len(ip))
copy(ipCopy, ip)
ips = append(ips, ipCopy.String())
}
// Remove network and broadcast for IPv4 /31 and larger
if len(ips) > 2 {
ips = ips[1 : len(ips)-1]
}
return ips
}
// incrementIP increments an IP address by one.
func incrementIP(ip net.IP) {
for j := len(ip) - 1; j >= 0; j-- {
ip[j]++
if ip[j] > 0 {
break
}
}
}
// scanEndpoints probes TLS endpoints concurrently and returns results.
func (s *NetworkScanService) scanEndpoints(ctx context.Context, endpoints []string, timeout time.Duration) []domain.NetworkScanResult {
results := make([]domain.NetworkScanResult, len(endpoints))
sem := make(chan struct{}, s.concurrency)
var wg sync.WaitGroup
for i, endpoint := range endpoints {
if ctx.Err() != nil {
break
}
wg.Add(1)
sem <- struct{}{}
go func(idx int, addr string) {
defer wg.Done()
defer func() { <-sem }()
results[idx] = s.probeTLS(ctx, addr, timeout)
}(i, endpoint)
}
wg.Wait()
return results
}
// probeTLS connects to an endpoint, performs a TLS handshake, and extracts certificates.
func (s *NetworkScanService) probeTLS(ctx context.Context, address string, timeout time.Duration) domain.NetworkScanResult {
startTime := time.Now()
result := domain.NetworkScanResult{Address: address}
dialer := &net.Dialer{Timeout: timeout}
conn, err := tls.DialWithDialer(dialer, "tcp", address, &tls.Config{
InsecureSkipVerify: true, // We want to discover ALL certs, including self-signed
})
if err != nil {
result.Error = err.Error()
result.LatencyMs = int(time.Since(startTime).Milliseconds())
return result
}
defer conn.Close()
result.LatencyMs = int(time.Since(startTime).Milliseconds())
// Extract certificates from TLS connection state
state := conn.ConnectionState()
for _, cert := range state.PeerCertificates {
entry := tlsCertToEntry(cert, address)
result.Certs = append(result.Certs, entry)
}
return result
}
// tlsCertToEntry converts an x509.Certificate from a TLS handshake into a DiscoveredCertEntry.
func tlsCertToEntry(cert *x509.Certificate, address string) domain.DiscoveredCertEntry {
// Compute SHA-256 fingerprint
fingerprintBytes := sha256.Sum256(cert.Raw)
fingerprint := fmt.Sprintf("%x", fingerprintBytes)
// Encode as PEM
pemBlock := &pem.Block{Type: "CERTIFICATE", Bytes: cert.Raw}
pemData := string(pem.EncodeToMemory(pemBlock))
// Key algorithm and size
keyAlg, keySize := tlsCertKeyInfo(cert)
return domain.DiscoveredCertEntry{
FingerprintSHA256: fingerprint,
CommonName: cert.Subject.CommonName,
SANs: cert.DNSNames,
SerialNumber: cert.SerialNumber.Text(16),
IssuerDN: cert.Issuer.String(),
SubjectDN: cert.Subject.String(),
NotBefore: cert.NotBefore.UTC().Format(time.RFC3339),
NotAfter: cert.NotAfter.UTC().Format(time.RFC3339),
KeyAlgorithm: keyAlg,
KeySize: keySize,
IsCA: cert.IsCA,
PEMData: pemData,
SourcePath: address,
SourceFormat: "network",
}
}
// tlsCertKeyInfo extracts key algorithm name and size from a certificate.
func tlsCertKeyInfo(cert *x509.Certificate) (string, int) {
switch pub := cert.PublicKey.(type) {
case *rsa.PublicKey:
return "RSA", pub.N.BitLen()
case *ecdsa.PublicKey:
return "ECDSA", pub.Curve.Params().BitSize
default:
switch cert.PublicKeyAlgorithm {
case x509.Ed25519:
return "Ed25519", 256
default:
return cert.PublicKeyAlgorithm.String(), 0
}
}
}
+244
View File
@@ -0,0 +1,244 @@
package service
import (
"context"
"fmt"
"testing"
"time"
"github.com/shankar0123/certctl/internal/domain"
)
// mockNetworkScanRepo for testing
type mockNetworkScanRepo struct {
targets []*domain.NetworkScanTarget
}
func (m *mockNetworkScanRepo) List(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
return m.targets, nil
}
func (m *mockNetworkScanRepo) ListEnabled(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
var enabled []*domain.NetworkScanTarget
for _, t := range m.targets {
if t.Enabled {
enabled = append(enabled, t)
}
}
return enabled, nil
}
func (m *mockNetworkScanRepo) Get(ctx context.Context, id string) (*domain.NetworkScanTarget, error) {
for _, t := range m.targets {
if t.ID == id {
return t, nil
}
}
return nil, fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanRepo) Create(ctx context.Context, target *domain.NetworkScanTarget) error {
m.targets = append(m.targets, target)
return nil
}
func (m *mockNetworkScanRepo) Update(ctx context.Context, target *domain.NetworkScanTarget) error {
for i, t := range m.targets {
if t.ID == target.ID {
m.targets[i] = target
return nil
}
}
return fmt.Errorf("not found: %s", target.ID)
}
func (m *mockNetworkScanRepo) Delete(ctx context.Context, id string) error {
for i, t := range m.targets {
if t.ID == id {
m.targets = append(m.targets[:i], m.targets[i+1:]...)
return nil
}
}
return fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanRepo) UpdateScanResults(ctx context.Context, id string, scanAt time.Time, durationMs int, certsFound int) error {
for _, t := range m.targets {
if t.ID == id {
t.LastScanAt = &scanAt
d := durationMs
t.LastScanDurationMs = &d
c := certsFound
t.LastScanCertsFound = &c
return nil
}
}
return fmt.Errorf("not found: %s", id)
}
func TestExpandCIDR_SingleIP(t *testing.T) {
ips := expandCIDR("192.168.1.1")
if len(ips) != 1 || ips[0] != "192.168.1.1" {
t.Errorf("expected [192.168.1.1], got %v", ips)
}
}
func TestExpandCIDR_Slash30(t *testing.T) {
// /30 = 4 total addresses, 2 usable (remove network + broadcast)
ips := expandCIDR("10.0.0.0/30")
if len(ips) != 2 {
t.Errorf("expected 2 usable IPs for /30, got %d: %v", len(ips), ips)
}
}
func TestExpandCIDR_Slash24(t *testing.T) {
ips := expandCIDR("10.0.0.0/24")
if len(ips) != 254 {
t.Errorf("expected 254 usable IPs for /24, got %d", len(ips))
}
}
func TestExpandCIDR_TooLarge(t *testing.T) {
// /16 = 65536 IPs, exceeds /20 cap
ips := expandCIDR("10.0.0.0/16")
if len(ips) != 0 {
t.Errorf("expected empty for /16 (too large), got %d", len(ips))
}
}
func TestExpandCIDR_InvalidInput(t *testing.T) {
ips := expandCIDR("not-a-cidr")
if len(ips) != 0 {
t.Errorf("expected empty for invalid input, got %v", ips)
}
}
func TestNetworkScanService_CreateTarget(t *testing.T) {
repo := &mockNetworkScanRepo{}
auditRepo := newMockAuditRepository()
auditService := NewAuditService(auditRepo)
svc := NewNetworkScanService(repo, nil, auditService, nil)
target, err := svc.CreateTarget(context.Background(), &domain.NetworkScanTarget{
Name: "Test Network",
CIDRs: []string{"10.0.0.0/24"},
Ports: []int{443, 8443},
})
if err != nil {
t.Fatalf("CreateTarget failed: %v", err)
}
if target.ID == "" {
t.Error("expected non-empty ID")
}
if !target.Enabled {
t.Error("expected target to be enabled by default")
}
if target.ScanIntervalHours != 6 {
t.Errorf("expected default interval 6h, got %d", target.ScanIntervalHours)
}
if target.TimeoutMs != 5000 {
t.Errorf("expected default timeout 5000ms, got %d", target.TimeoutMs)
}
}
func TestNetworkScanService_CreateTarget_ValidationErrors(t *testing.T) {
repo := &mockNetworkScanRepo{}
auditRepo := newMockAuditRepository()
auditService := NewAuditService(auditRepo)
svc := NewNetworkScanService(repo, nil, auditService, nil)
tests := []struct {
name string
target *domain.NetworkScanTarget
errMsg string
}{
{
name: "missing name",
target: &domain.NetworkScanTarget{CIDRs: []string{"10.0.0.0/24"}},
errMsg: "name is required",
},
{
name: "missing cidrs",
target: &domain.NetworkScanTarget{Name: "test"},
errMsg: "at least one CIDR is required",
},
{
name: "invalid cidr",
target: &domain.NetworkScanTarget{Name: "test", CIDRs: []string{"not-valid"}},
errMsg: "invalid CIDR or IP",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := svc.CreateTarget(context.Background(), tt.target)
if err == nil {
t.Fatal("expected error")
}
if !containsSubstring(err.Error(), tt.errMsg) {
t.Errorf("expected error containing %q, got %q", tt.errMsg, err.Error())
}
})
}
}
func TestNetworkScanService_DeleteTarget(t *testing.T) {
repo := &mockNetworkScanRepo{
targets: []*domain.NetworkScanTarget{
{ID: "nst-1", Name: "test"},
},
}
auditRepo := newMockAuditRepository()
auditService := NewAuditService(auditRepo)
svc := NewNetworkScanService(repo, nil, auditService, nil)
if err := svc.DeleteTarget(context.Background(), "nst-1"); err != nil {
t.Fatalf("DeleteTarget failed: %v", err)
}
if len(repo.targets) != 0 {
t.Error("expected target to be deleted")
}
}
func TestNetworkScanService_ListTargets(t *testing.T) {
repo := &mockNetworkScanRepo{
targets: []*domain.NetworkScanTarget{
{ID: "nst-1", Name: "target1"},
{ID: "nst-2", Name: "target2"},
},
}
svc := NewNetworkScanService(repo, nil, nil, nil)
targets, err := svc.ListTargets(context.Background())
if err != nil {
t.Fatalf("ListTargets failed: %v", err)
}
if len(targets) != 2 {
t.Errorf("expected 2 targets, got %d", len(targets))
}
}
func TestExpandEndpoints(t *testing.T) {
svc := &NetworkScanService{}
endpoints := svc.expandEndpoints([]string{"192.168.1.1"}, []int{443, 8443})
if len(endpoints) != 2 {
t.Errorf("expected 2 endpoints, got %d: %v", len(endpoints), endpoints)
}
if endpoints[0] != "192.168.1.1:443" {
t.Errorf("expected 192.168.1.1:443, got %s", endpoints[0])
}
if endpoints[1] != "192.168.1.1:8443" {
t.Errorf("expected 192.168.1.1:8443, got %s", endpoints[1])
}
}
// containsSubstring checks if a string contains a substring (helper)
func containsSubstring(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
@@ -0,0 +1 @@
DROP TABLE IF EXISTS network_scan_targets;
@@ -0,0 +1,21 @@
-- Migration 000007: Network Discovery (Active TLS Scanning)
-- The control plane actively scans network endpoints for TLS certificates.
-- Results feed into the existing discovery pipeline (discovered_certificates table).
-- Network scan targets define CIDR ranges and ports to probe for TLS certificates
CREATE TABLE IF NOT EXISTS network_scan_targets (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
cidrs TEXT[] NOT NULL DEFAULT '{}',
ports INTEGER[] NOT NULL DEFAULT '{443}',
enabled BOOLEAN NOT NULL DEFAULT TRUE,
scan_interval_hours INTEGER NOT NULL DEFAULT 6,
timeout_ms INTEGER NOT NULL DEFAULT 5000,
last_scan_at TIMESTAMPTZ,
last_scan_duration_ms INTEGER,
last_scan_certs_found INTEGER,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_network_scan_targets_enabled ON network_scan_targets(enabled) WHERE enabled = TRUE;