mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 17:12:04 +00:00
21aeed4f4e
Phase 0 closure (Path B2, post-rewrite):
addlicense sweep — adds the canonical certctl LLC copyright + BUSL-1.1
SPDX header to every production Go file. Template:
// Copyright 2026 certctl LLC. All rights reserved.
// SPDX-License-Identifier: BUSL-1.1
Coverage: 338 / 338 production Go files (cmd/ + internal/, excluding
*_test.go and **/testdata/**). Pre-sweep coverage was 22 / 338 (6.5%);
post-sweep is 338 / 338 (100%).
Normalized 22 pre-existing legacy headers (`// Copyright (c) certctl`
+ `// SPDX-License-Identifier: BSL-1.1`) and 1 file using a
`Certctl Contributors` attribution. The legacy SPDX ID `BSL-1.1`
is non-standard; the official SPDX identifier for Business Source
License 1.1 is `BUSL-1.1` (capital U). All 338 files now share the
canonical form.
Generated via:
addlicense -c "certctl LLC" -y 2026 \
-f cowork/legal/copyright-header.tpl \
-ignore '**/testdata/**' -ignore '**/*_test.go' \
cmd/ internal/
Verification:
find cmd internal -name '*.go' -not -name '*_test.go' \
-not -path '*/testdata/*' \
-exec grep -L '^// Copyright 2026 certctl LLC' {} \; | wc -l
Returns: 0
gofmt clean. Header additions are comments only, no compile impact.
Closes: cowork/certctl-architecture-diligence-audit.html#fix-RED-4
534 lines
17 KiB
Go
534 lines
17 KiB
Go
// Copyright 2026 certctl LLC. All rights reserved.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package service
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"encoding/pem"
|
|
"fmt"
|
|
"log/slog"
|
|
"net"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/certctl-io/certctl/internal/domain"
|
|
"github.com/certctl-io/certctl/internal/repository"
|
|
"github.com/certctl-io/certctl/internal/tlsprobe"
|
|
"github.com/certctl-io/certctl/internal/validation"
|
|
)
|
|
|
|
// SentinelAgentID is the agent ID used for network-discovered certificates.
|
|
// This allows the existing discovery dedup constraint (fingerprint, agent_id, source_path)
|
|
// to work without schema changes.
|
|
const SentinelAgentID = "server-scanner"
|
|
|
|
// NetworkScanService manages active TLS scanning of network endpoints.
|
|
type NetworkScanService struct {
|
|
networkScanRepo repository.NetworkScanRepository
|
|
discoveryService *DiscoveryService
|
|
auditService *AuditService
|
|
logger *slog.Logger
|
|
concurrency int
|
|
|
|
// SCEP RFC 8894 + Intune master bundle Phase 11.5 — SCEP probe
|
|
// state. Optional: nil-safe so deploys that don't enable the probe
|
|
// surface (no scep_probe_results table populated) still work.
|
|
scepProbeRepo repository.SCEPProbeResultRepository
|
|
scepHTTPClient *http.Client // built from SafeHTTPDialContext for SSRF defense
|
|
scepValidateURL func(string) error // defaults to validation.ValidateSafeURL; tests inject permissive
|
|
scepIDFn func() string
|
|
nowFn func() time.Time
|
|
}
|
|
|
|
// NewNetworkScanService creates a new network scan service.
|
|
func NewNetworkScanService(
|
|
networkScanRepo repository.NetworkScanRepository,
|
|
discoveryService *DiscoveryService,
|
|
auditService *AuditService,
|
|
logger *slog.Logger,
|
|
) *NetworkScanService {
|
|
return &NetworkScanService{
|
|
networkScanRepo: networkScanRepo,
|
|
discoveryService: discoveryService,
|
|
auditService: auditService,
|
|
logger: logger,
|
|
concurrency: 50,
|
|
nowFn: time.Now,
|
|
}
|
|
}
|
|
|
|
// SetSCEPProbeRepo wires the SCEP probe persistence repository onto the
|
|
// service. Called from cmd/server/main.go at startup. Nil-safe — calling
|
|
// ProbeSCEP without a repo just skips the persist step (the probe still
|
|
// runs and returns its result synchronously).
|
|
//
|
|
// SCEP RFC 8894 + Intune master bundle Phase 11.5.
|
|
func (s *NetworkScanService) SetSCEPProbeRepo(repo repository.SCEPProbeResultRepository) {
|
|
s.scepProbeRepo = repo
|
|
}
|
|
|
|
// ListTargets returns all network scan targets.
|
|
func (s *NetworkScanService) ListTargets(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
|
|
return s.networkScanRepo.List(ctx)
|
|
}
|
|
|
|
// GetTarget retrieves a network scan target by ID.
|
|
func (s *NetworkScanService) GetTarget(ctx context.Context, id string) (*domain.NetworkScanTarget, error) {
|
|
return s.networkScanRepo.Get(ctx, id)
|
|
}
|
|
|
|
// maxCIDRHostBits is the maximum number of host bits allowed in a CIDR range.
|
|
// A /20 network has 12 host bits = 4096 IPs max. This prevents operators from
|
|
// accidentally creating scan targets that would exhaust server resources.
|
|
const maxCIDRHostBits = 12
|
|
|
|
// validateCIDRs validates a list of CIDRs for syntax correctness and size limits.
|
|
// Each CIDR must be a valid CIDR notation or plain IP address, and no single CIDR
|
|
// may be larger than /20 (4096 IPs). This validation runs at API request time so
|
|
// operators get an immediate 400 error instead of a silent truncation at scan time.
|
|
func validateCIDRs(cidrs []string) error {
|
|
for _, cidr := range cidrs {
|
|
_, ipNet, err := net.ParseCIDR(cidr)
|
|
if err != nil {
|
|
// Try parsing as plain IP (single host)
|
|
if ip := net.ParseIP(cidr); ip == nil {
|
|
return fmt.Errorf("invalid CIDR or IP: %s", cidr)
|
|
}
|
|
continue // Single IPs are always valid size
|
|
}
|
|
// Enforce /20 size cap at API level
|
|
ones, bits := ipNet.Mask.Size()
|
|
hostBits := bits - ones
|
|
if hostBits > maxCIDRHostBits {
|
|
return fmt.Errorf("CIDR %s is too large (/%d has %d host bits, max /%d with %d host bits = 4096 IPs)",
|
|
cidr, ones, hostBits, bits-maxCIDRHostBits, maxCIDRHostBits)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CreateTarget creates a new network scan target.
|
|
func (s *NetworkScanService) CreateTarget(ctx context.Context, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
|
|
if target.Name == "" {
|
|
return nil, fmt.Errorf("name is required")
|
|
}
|
|
if len(target.CIDRs) == 0 {
|
|
return nil, fmt.Errorf("at least one CIDR is required")
|
|
}
|
|
// Validate CIDRs (syntax + /20 size cap)
|
|
if err := validateCIDRs(target.CIDRs); err != nil {
|
|
return nil, err
|
|
}
|
|
if len(target.Ports) == 0 {
|
|
target.Ports = []int64{443}
|
|
}
|
|
if target.ScanIntervalHours == 0 {
|
|
target.ScanIntervalHours = 6
|
|
}
|
|
if target.TimeoutMs == 0 {
|
|
target.TimeoutMs = 5000
|
|
}
|
|
target.ID = generateID("nst")
|
|
target.Enabled = true
|
|
target.CreatedAt = time.Now()
|
|
target.UpdatedAt = time.Now()
|
|
|
|
if err := s.networkScanRepo.Create(ctx, target); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s.auditService.RecordEvent(ctx, "operator", domain.ActorTypeUser,
|
|
"network_scan_target_created", "network_scan_target", target.ID,
|
|
map[string]interface{}{
|
|
"name": target.Name,
|
|
"cidrs": target.CIDRs,
|
|
"ports": target.Ports,
|
|
})
|
|
|
|
return target, nil
|
|
}
|
|
|
|
// UpdateTarget updates an existing network scan target.
|
|
func (s *NetworkScanService) UpdateTarget(ctx context.Context, id string, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
|
|
existing, err := s.networkScanRepo.Get(ctx, id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if target.Name != "" {
|
|
existing.Name = target.Name
|
|
}
|
|
if len(target.CIDRs) > 0 {
|
|
// Validate new CIDRs (syntax + /20 size cap)
|
|
if err := validateCIDRs(target.CIDRs); err != nil {
|
|
return nil, err
|
|
}
|
|
existing.CIDRs = target.CIDRs
|
|
}
|
|
if len(target.Ports) > 0 {
|
|
existing.Ports = target.Ports
|
|
}
|
|
if target.ScanIntervalHours > 0 {
|
|
existing.ScanIntervalHours = target.ScanIntervalHours
|
|
}
|
|
if target.TimeoutMs > 0 {
|
|
existing.TimeoutMs = target.TimeoutMs
|
|
}
|
|
// Always update enabled field (it's a boolean, so 0-value is meaningful)
|
|
existing.Enabled = target.Enabled
|
|
|
|
if err := s.networkScanRepo.Update(ctx, existing); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return existing, nil
|
|
}
|
|
|
|
// DeleteTarget removes a network scan target.
|
|
func (s *NetworkScanService) DeleteTarget(ctx context.Context, id string) error {
|
|
if err := s.networkScanRepo.Delete(ctx, id); err != nil {
|
|
return fmt.Errorf("failed to delete network scan target: %w", err)
|
|
}
|
|
|
|
s.auditService.RecordEvent(ctx, "operator", domain.ActorTypeUser,
|
|
"network_scan_target_deleted", "network_scan_target", id, nil)
|
|
|
|
return nil
|
|
}
|
|
|
|
// ScanAllTargets runs the active TLS scan for all enabled targets.
|
|
// This is called by the scheduler on the configured interval.
|
|
func (s *NetworkScanService) ScanAllTargets(ctx context.Context) error {
|
|
targets, err := s.networkScanRepo.ListEnabled(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("list enabled targets: %w", err)
|
|
}
|
|
|
|
if len(targets) == 0 {
|
|
if s.logger != nil {
|
|
s.logger.Debug("no enabled network scan targets")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
if s.logger != nil {
|
|
s.logger.Info("starting network scan", "targets", len(targets))
|
|
}
|
|
|
|
for _, target := range targets {
|
|
if ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
s.scanTarget(ctx, target)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// TriggerScan runs an immediate scan for a specific target.
|
|
func (s *NetworkScanService) TriggerScan(ctx context.Context, targetID string) (*domain.DiscoveryScan, error) {
|
|
target, err := s.networkScanRepo.Get(ctx, targetID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return s.scanTarget(ctx, target), nil
|
|
}
|
|
|
|
// scanTarget scans a single network target and feeds results into the discovery pipeline.
|
|
func (s *NetworkScanService) scanTarget(ctx context.Context, target *domain.NetworkScanTarget) *domain.DiscoveryScan {
|
|
startTime := time.Now()
|
|
if s.logger != nil {
|
|
s.logger.Info("scanning network target",
|
|
"target_id", target.ID,
|
|
"name", target.Name,
|
|
"cidrs", target.CIDRs,
|
|
"ports", target.Ports)
|
|
}
|
|
|
|
// Expand CIDRs to individual IPs
|
|
endpoints := s.expandEndpoints(target.CIDRs, target.Ports)
|
|
if s.logger != nil {
|
|
s.logger.Debug("expanded endpoints", "count", len(endpoints))
|
|
}
|
|
|
|
// Scan endpoints concurrently
|
|
timeout := time.Duration(target.TimeoutMs) * time.Millisecond
|
|
results := s.scanEndpoints(ctx, endpoints, timeout)
|
|
|
|
// Collect discovered cert entries and per-endpoint errors.
|
|
//
|
|
// M-9 (operator-observability): before this fix, scanErrors was declared
|
|
// but never appended to, so the "errors" count in the summary Info log
|
|
// and the Errors field on the DiscoveryReport were always zero/nil —
|
|
// silently hiding per-endpoint failures from operators and from the
|
|
// downstream scan history record. Per-endpoint failures are still logged
|
|
// at Debug (sweep scans generate high connection-refused noise by design
|
|
// — most hosts in a CIDR won't have TLS on the probed port), but the
|
|
// aggregate count and the report's Errors field now reflect reality so
|
|
// operators can see, via the scan summary and the stored scan record,
|
|
// how many endpoints failed without having to enable Debug logging.
|
|
entries, scanErrors := s.collectScanResults(results)
|
|
|
|
scanDuration := time.Since(startTime)
|
|
if s.logger != nil {
|
|
s.logger.Info("network target scan completed",
|
|
"target_id", target.ID,
|
|
"endpoints_scanned", len(endpoints),
|
|
"certificates_found", len(entries),
|
|
"errors", len(scanErrors),
|
|
"duration_ms", scanDuration.Milliseconds())
|
|
}
|
|
|
|
// Update scan results on target
|
|
s.networkScanRepo.UpdateScanResults(ctx, target.ID, time.Now(),
|
|
int(scanDuration.Milliseconds()), len(entries))
|
|
|
|
// Feed into discovery pipeline if we found certs
|
|
if len(entries) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Build directories list from CIDRs for the scan record
|
|
dirs := make([]string, len(target.CIDRs))
|
|
copy(dirs, target.CIDRs)
|
|
|
|
report := &domain.DiscoveryReport{
|
|
AgentID: SentinelAgentID,
|
|
Directories: dirs,
|
|
Certificates: entries,
|
|
Errors: scanErrors,
|
|
ScanDurationMs: int(scanDuration.Milliseconds()),
|
|
}
|
|
|
|
scan, err := s.discoveryService.ProcessDiscoveryReport(ctx, report)
|
|
if err != nil {
|
|
if s.logger != nil {
|
|
s.logger.Error("failed to process network scan report",
|
|
"target_id", target.ID,
|
|
"error", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
return scan
|
|
}
|
|
|
|
// expandEndpoints converts CIDR ranges and ports into a list of "ip:port" endpoints.
|
|
// Filters out reserved IP ranges and logs warnings.
|
|
func (s *NetworkScanService) expandEndpoints(cidrs []string, ports []int64) []string {
|
|
var endpoints []string
|
|
|
|
for _, cidr := range cidrs {
|
|
ips := expandCIDR(cidr)
|
|
if ips == nil || len(ips) == 0 {
|
|
if s.logger != nil {
|
|
s.logger.Warn("CIDR range filtered (reserved or too large)",
|
|
"cidr", cidr)
|
|
}
|
|
continue
|
|
}
|
|
for _, ip := range ips {
|
|
for _, port := range ports {
|
|
endpoints = append(endpoints, fmt.Sprintf("%s:%d", ip, port))
|
|
}
|
|
}
|
|
}
|
|
|
|
return endpoints
|
|
}
|
|
|
|
// The reserved-IP filter used by expandCIDR previously lived here as an
|
|
// unexported isReservedIP helper. It has been moved to
|
|
// internal/validation.IsReservedIP so the webhook notifier can share a single
|
|
// authoritative implementation (H-4, CWE-918). The behaviour is
|
|
// byte-identical with the previous helper — RFC 1918 is intentionally NOT
|
|
// filtered, matching certctl's self-hosted design. If you change the
|
|
// validation package's IsReservedIP, you are changing the network-scanner's
|
|
// behaviour; audit both code paths together.
|
|
|
|
// expandCIDR expands a CIDR notation or single IP into a list of IPs.
|
|
// Limits expansion to /20 (4096 IPs) to prevent accidental huge scans.
|
|
// Filters out reserved IP ranges (via validation.IsReservedIP) to prevent
|
|
// SSRF amplification via network-scan targets pointed at cloud metadata or
|
|
// loopback.
|
|
func expandCIDR(cidr string) []string {
|
|
// Try as CIDR first
|
|
ip, ipNet, err := net.ParseCIDR(cidr)
|
|
if err != nil {
|
|
// Try as single IP
|
|
if singleIP := net.ParseIP(cidr); singleIP != nil {
|
|
if validation.IsReservedIP(singleIP) {
|
|
return nil
|
|
}
|
|
return []string{singleIP.String()}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Count network size and cap at /20
|
|
ones, bits := ipNet.Mask.Size()
|
|
hostBits := bits - ones
|
|
if hostBits > 12 { // More than 4096 hosts
|
|
return nil // Skip overly large networks
|
|
}
|
|
|
|
var ips []string
|
|
for ip := ip.Mask(ipNet.Mask); ipNet.Contains(ip); incrementIP(ip) {
|
|
// Skip reserved IPs
|
|
if validation.IsReservedIP(ip) {
|
|
continue
|
|
}
|
|
|
|
// Copy IP before appending (net.IP is a mutable slice)
|
|
ipCopy := make(net.IP, len(ip))
|
|
copy(ipCopy, ip)
|
|
ips = append(ips, ipCopy.String())
|
|
}
|
|
|
|
// Remove network and broadcast for IPv4 /31 and larger
|
|
if len(ips) > 2 {
|
|
ips = ips[1 : len(ips)-1]
|
|
}
|
|
|
|
return ips
|
|
}
|
|
|
|
// incrementIP increments an IP address by one.
|
|
func incrementIP(ip net.IP) {
|
|
for j := len(ip) - 1; j >= 0; j-- {
|
|
ip[j]++
|
|
if ip[j] > 0 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// collectScanResults partitions per-endpoint scan results into discovered
|
|
// certificate entries and a list of per-endpoint error strings.
|
|
//
|
|
// M-9 (operator-observability): the summary Info log and the DiscoveryReport
|
|
// both report the count of endpoints that failed to probe. Before this helper
|
|
// existed, the caller accumulated entries but never populated the errors
|
|
// slice, so the aggregate error count was always zero and the scan record's
|
|
// Errors field was always nil — silently hiding per-endpoint failures.
|
|
//
|
|
// Per-endpoint errors remain logged at Debug (sweep scans generate high
|
|
// connection-refused noise by design — most hosts in a CIDR won't have TLS
|
|
// on the probed port). Aggregation surfaces the count at Info, preserving
|
|
// Debug-level detail for operators who want it without creating log spam
|
|
// at default verbosity.
|
|
func (s *NetworkScanService) collectScanResults(results []domain.NetworkScanResult) ([]domain.DiscoveredCertEntry, []string) {
|
|
var entries []domain.DiscoveredCertEntry
|
|
var scanErrors []string
|
|
for _, result := range results {
|
|
if result.Error != "" {
|
|
// Debug-level is intentional: a sweep scan of a /24 typically
|
|
// produces 200+ connection-refused results, and logging each
|
|
// at Warn would create log spam at default verbosity. The
|
|
// aggregate count in the Info-level scan-completed log surfaces
|
|
// the failure volume to operators; Debug provides the detail
|
|
// when diagnosing a specific endpoint.
|
|
if s.logger != nil {
|
|
s.logger.Debug("scan endpoint error",
|
|
"address", result.Address,
|
|
"error", result.Error)
|
|
}
|
|
scanErrors = append(scanErrors, fmt.Sprintf("%s: %s", result.Address, result.Error))
|
|
continue
|
|
}
|
|
entries = append(entries, result.Certs...)
|
|
}
|
|
return entries, scanErrors
|
|
}
|
|
|
|
// scanEndpoints probes TLS endpoints concurrently and returns results.
|
|
func (s *NetworkScanService) scanEndpoints(ctx context.Context, endpoints []string, timeout time.Duration) []domain.NetworkScanResult {
|
|
results := make([]domain.NetworkScanResult, len(endpoints))
|
|
sem := make(chan struct{}, s.concurrency)
|
|
var wg sync.WaitGroup
|
|
|
|
for i, endpoint := range endpoints {
|
|
if ctx.Err() != nil {
|
|
break
|
|
}
|
|
wg.Add(1)
|
|
sem <- struct{}{}
|
|
go func(idx int, addr string) {
|
|
defer wg.Done()
|
|
defer func() { <-sem }()
|
|
results[idx] = s.probeTLS(ctx, addr, timeout)
|
|
}(i, endpoint)
|
|
}
|
|
wg.Wait()
|
|
return results
|
|
}
|
|
|
|
// probeTLS connects to an endpoint, performs a TLS handshake, and extracts certificates.
|
|
func (s *NetworkScanService) probeTLS(ctx context.Context, address string, timeout time.Duration) domain.NetworkScanResult {
|
|
startTime := time.Now()
|
|
result := domain.NetworkScanResult{Address: address}
|
|
|
|
dialer := &net.Dialer{Timeout: timeout}
|
|
conn, err := tls.DialWithDialer(dialer, "tcp", address, &tls.Config{
|
|
// SECURITY NOTE: InsecureSkipVerify is intentionally set to true here.
|
|
// The network scanner must discover ALL certificates including self-signed,
|
|
// expired, and internal CA certificates. This setting is scoped to discovery
|
|
// probing only — it is NEVER used for control-plane API calls, issuer
|
|
// connector communication, or any operation that trusts the certificate.
|
|
// The endpoint's certificate chain is extracted and analyzed, not validated.
|
|
// See TICKET-016 for full security audit rationale.
|
|
InsecureSkipVerify: true, //nolint:gosec // discovery probe; documented above + docs/tls.md L-001 table
|
|
})
|
|
if err != nil {
|
|
result.Error = err.Error()
|
|
result.LatencyMs = int(time.Since(startTime).Milliseconds())
|
|
return result
|
|
}
|
|
defer conn.Close()
|
|
|
|
result.LatencyMs = int(time.Since(startTime).Milliseconds())
|
|
|
|
// Extract certificates from TLS connection state
|
|
state := conn.ConnectionState()
|
|
for _, cert := range state.PeerCertificates {
|
|
entry := tlsCertToEntry(cert, address)
|
|
result.Certs = append(result.Certs, entry)
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// tlsCertToEntry converts an x509.Certificate from a TLS handshake into a DiscoveredCertEntry.
|
|
func tlsCertToEntry(cert *x509.Certificate, address string) domain.DiscoveredCertEntry {
|
|
// Compute SHA-256 fingerprint using shared tlsprobe package
|
|
fingerprint := tlsprobe.CertFingerprint(cert)
|
|
|
|
// Encode as PEM
|
|
pemBlock := &pem.Block{Type: "CERTIFICATE", Bytes: cert.Raw}
|
|
pemData := string(pem.EncodeToMemory(pemBlock))
|
|
|
|
// Key algorithm and size using shared tlsprobe package
|
|
keyAlg, keySize := tlsprobe.CertKeyInfo(cert)
|
|
|
|
return domain.DiscoveredCertEntry{
|
|
FingerprintSHA256: fingerprint,
|
|
CommonName: cert.Subject.CommonName,
|
|
SANs: cert.DNSNames,
|
|
SerialNumber: cert.SerialNumber.Text(16),
|
|
IssuerDN: cert.Issuer.String(),
|
|
SubjectDN: cert.Subject.String(),
|
|
NotBefore: cert.NotBefore.UTC().Format(time.RFC3339),
|
|
NotAfter: cert.NotAfter.UTC().Format(time.RFC3339),
|
|
KeyAlgorithm: keyAlg,
|
|
KeySize: keySize,
|
|
IsCA: cert.IsCA,
|
|
PEMData: pemData,
|
|
SourcePath: address,
|
|
SourceFormat: "network",
|
|
}
|
|
}
|