diff --git a/internal/service/network_scan.go b/internal/service/network_scan.go index 3464eed..100eac3 100644 --- a/internal/service/network_scan.go +++ b/internal/service/network_scan.go @@ -235,21 +235,19 @@ func (s *NetworkScanService) scanTarget(ctx context.Context, target *domain.Netw timeout := time.Duration(target.TimeoutMs) * time.Millisecond results := s.scanEndpoints(ctx, endpoints, timeout) - // Collect discovered cert entries - var entries []domain.DiscoveredCertEntry - var scanErrors []string - for _, result := range results { - if result.Error != "" { - // Only log connection errors at debug level (many hosts won't have TLS) - if s.logger != nil { - s.logger.Debug("scan endpoint error", - "address", result.Address, - "error", result.Error) - } - continue - } - entries = append(entries, result.Certs...) - } + // Collect discovered cert entries and per-endpoint errors. + // + // M-9 (operator-observability): before this fix, scanErrors was declared + // but never appended to, so the "errors" count in the summary Info log + // and the Errors field on the DiscoveryReport were always zero/nil — + // silently hiding per-endpoint failures from operators and from the + // downstream scan history record. Per-endpoint failures are still logged + // at Debug (sweep scans generate high connection-refused noise by design + // — most hosts in a CIDR won't have TLS on the probed port), but the + // aggregate count and the report's Errors field now reflect reality so + // operators can see, via the scan summary and the stored scan record, + // how many endpoints failed without having to enable Debug logging. + entries, scanErrors := s.collectScanResults(results) scanDuration := time.Since(startTime) if s.logger != nil { @@ -385,6 +383,44 @@ func incrementIP(ip net.IP) { } } +// collectScanResults partitions per-endpoint scan results into discovered +// certificate entries and a list of per-endpoint error strings. +// +// M-9 (operator-observability): the summary Info log and the DiscoveryReport +// both report the count of endpoints that failed to probe. Before this helper +// existed, the caller accumulated entries but never populated the errors +// slice, so the aggregate error count was always zero and the scan record's +// Errors field was always nil — silently hiding per-endpoint failures. +// +// Per-endpoint errors remain logged at Debug (sweep scans generate high +// connection-refused noise by design — most hosts in a CIDR won't have TLS +// on the probed port). Aggregation surfaces the count at Info, preserving +// Debug-level detail for operators who want it without creating log spam +// at default verbosity. +func (s *NetworkScanService) collectScanResults(results []domain.NetworkScanResult) ([]domain.DiscoveredCertEntry, []string) { + var entries []domain.DiscoveredCertEntry + var scanErrors []string + for _, result := range results { + if result.Error != "" { + // Debug-level is intentional: a sweep scan of a /24 typically + // produces 200+ connection-refused results, and logging each + // at Warn would create log spam at default verbosity. The + // aggregate count in the Info-level scan-completed log surfaces + // the failure volume to operators; Debug provides the detail + // when diagnosing a specific endpoint. + if s.logger != nil { + s.logger.Debug("scan endpoint error", + "address", result.Address, + "error", result.Error) + } + scanErrors = append(scanErrors, fmt.Sprintf("%s: %s", result.Address, result.Error)) + continue + } + entries = append(entries, result.Certs...) + } + return entries, scanErrors +} + // scanEndpoints probes TLS endpoints concurrently and returns results. func (s *NetworkScanService) scanEndpoints(ctx context.Context, endpoints []string, timeout time.Duration) []domain.NetworkScanResult { results := make([]domain.NetworkScanResult, len(endpoints)) diff --git a/internal/service/network_scan_test.go b/internal/service/network_scan_test.go index 3096fda..48803c6 100644 --- a/internal/service/network_scan_test.go +++ b/internal/service/network_scan_test.go @@ -491,3 +491,113 @@ func TestExpandCIDR_SingleLinkLocalIP(t *testing.T) { t.Errorf("expected empty for cloud metadata IP, got %v", ips) } } + +// TestCollectScanResults_AggregatesErrors is the M-9 regression guard: +// per-endpoint probe failures must accumulate into the errors slice so the +// summary Info log and the DiscoveryReport reflect the true failure count. +// Before the M-9 fix, scanErrors was declared but never appended to, so the +// aggregate count was always zero and the scan record's Errors field was +// always nil — silently hiding per-endpoint failures from operators. +func TestCollectScanResults_AggregatesErrors(t *testing.T) { + svc := &NetworkScanService{} + results := []domain.NetworkScanResult{ + {Address: "203.0.113.1:443", Error: "connection refused"}, + {Address: "203.0.113.2:443", Certs: []domain.DiscoveredCertEntry{ + {CommonName: "example.com"}, + }}, + {Address: "203.0.113.3:443", Error: "tls handshake failure"}, + {Address: "203.0.113.4:443", Certs: []domain.DiscoveredCertEntry{ + {CommonName: "internal.example.com"}, + }}, + {Address: "203.0.113.5:443", Error: "i/o timeout"}, + } + + entries, errs := svc.collectScanResults(results) + + if len(entries) != 2 { + t.Errorf("expected 2 entries (one per successful probe), got %d", len(entries)) + } + if len(errs) != 3 { + t.Fatalf("expected 3 error strings (one per failed probe), got %d: %v", len(errs), errs) + } + + // Each error string must be non-empty and include the endpoint address so + // the scan record lets operators correlate failures back to endpoints + // without needing Debug logging enabled. + for i, e := range errs { + if e == "" { + t.Errorf("error[%d]: expected non-empty error string", i) + } + } + + // Spot-check that address is threaded through the error strings. + if want := "203.0.113.1:443"; errs[0] == "" || errs[0][:len(want)] != want { + t.Errorf("errs[0] should start with %q, got %q", want, errs[0]) + } + if want := "203.0.113.3:443"; errs[1] == "" || errs[1][:len(want)] != want { + t.Errorf("errs[1] should start with %q, got %q", want, errs[1]) + } + if want := "203.0.113.5:443"; errs[2] == "" || errs[2][:len(want)] != want { + t.Errorf("errs[2] should start with %q, got %q", want, errs[2]) + } +} + +// TestCollectScanResults_AllSuccess exercises the happy path: a scan where +// every endpoint returned certificates. The errors slice must be nil (not an +// empty non-nil slice) so the downstream DiscoveryReport.Errors field stays +// nil as well, preserving the JSON-omitempty behavior that callers rely on. +func TestCollectScanResults_AllSuccess(t *testing.T) { + svc := &NetworkScanService{} + results := []domain.NetworkScanResult{ + {Address: "203.0.113.10:443", Certs: []domain.DiscoveredCertEntry{ + {CommonName: "a.example.com"}, + }}, + {Address: "203.0.113.11:443", Certs: []domain.DiscoveredCertEntry{ + {CommonName: "b.example.com"}, + }}, + } + + entries, errs := svc.collectScanResults(results) + + if len(entries) != 2 { + t.Errorf("expected 2 entries, got %d", len(entries)) + } + if errs != nil { + t.Errorf("expected nil errors slice on all-success, got %v", errs) + } +} + +// TestCollectScanResults_AllFailed exercises the worst-case sweep: every +// endpoint failed to probe. Entries must be nil, and every failure must be +// recorded in the errors slice so the scan record is complete. +func TestCollectScanResults_AllFailed(t *testing.T) { + svc := &NetworkScanService{} + results := []domain.NetworkScanResult{ + {Address: "203.0.113.20:443", Error: "connection refused"}, + {Address: "203.0.113.21:443", Error: "connection refused"}, + {Address: "203.0.113.22:443", Error: "connection refused"}, + } + + entries, errs := svc.collectScanResults(results) + + if entries != nil { + t.Errorf("expected nil entries on all-failed, got %v", entries) + } + if len(errs) != 3 { + t.Errorf("expected 3 error strings, got %d: %v", len(errs), errs) + } +} + +// TestCollectScanResults_Empty guards against a degenerate empty-input case +// (scanEndpoints returns no results, e.g. if ctx was cancelled before the +// first probe ran). Both return slices must be nil. +func TestCollectScanResults_Empty(t *testing.T) { + svc := &NetworkScanService{} + entries, errs := svc.collectScanResults(nil) + if entries != nil { + t.Errorf("expected nil entries for empty input, got %v", entries) + } + if errs != nil { + t.Errorf("expected nil errors for empty input, got %v", errs) + } +}