Merge branch 'fix/m9-sentinel-discovery-log-levels': M-9 sentinel discovery log-level fix

This commit is contained in:
shankar0123
2026-04-18 02:53:50 +00:00
2 changed files with 161 additions and 15 deletions
+51 -15
View File
@@ -235,21 +235,19 @@ func (s *NetworkScanService) scanTarget(ctx context.Context, target *domain.Netw
timeout := time.Duration(target.TimeoutMs) * time.Millisecond
results := s.scanEndpoints(ctx, endpoints, timeout)
// Collect discovered cert entries
var entries []domain.DiscoveredCertEntry
var scanErrors []string
for _, result := range results {
if result.Error != "" {
// Only log connection errors at debug level (many hosts won't have TLS)
if s.logger != nil {
s.logger.Debug("scan endpoint error",
"address", result.Address,
"error", result.Error)
}
continue
}
entries = append(entries, result.Certs...)
}
// Collect discovered cert entries and per-endpoint errors.
//
// M-9 (operator-observability): before this fix, scanErrors was declared
// but never appended to, so the "errors" count in the summary Info log
// and the Errors field on the DiscoveryReport were always zero/nil —
// silently hiding per-endpoint failures from operators and from the
// downstream scan history record. Per-endpoint failures are still logged
// at Debug (sweep scans generate high connection-refused noise by design
// — most hosts in a CIDR won't have TLS on the probed port), but the
// aggregate count and the report's Errors field now reflect reality so
// operators can see, via the scan summary and the stored scan record,
// how many endpoints failed without having to enable Debug logging.
entries, scanErrors := s.collectScanResults(results)
scanDuration := time.Since(startTime)
if s.logger != nil {
@@ -385,6 +383,44 @@ func incrementIP(ip net.IP) {
}
}
// collectScanResults partitions per-endpoint scan results into discovered
// certificate entries and a list of per-endpoint error strings.
//
// M-9 (operator-observability): the summary Info log and the DiscoveryReport
// both report the count of endpoints that failed to probe. Before this helper
// existed, the caller accumulated entries but never populated the errors
// slice, so the aggregate error count was always zero and the scan record's
// Errors field was always nil — silently hiding per-endpoint failures.
//
// Per-endpoint errors remain logged at Debug (sweep scans generate high
// connection-refused noise by design — most hosts in a CIDR won't have TLS
// on the probed port). Aggregation surfaces the count at Info, preserving
// Debug-level detail for operators who want it without creating log spam
// at default verbosity.
func (s *NetworkScanService) collectScanResults(results []domain.NetworkScanResult) ([]domain.DiscoveredCertEntry, []string) {
var entries []domain.DiscoveredCertEntry
var scanErrors []string
for _, result := range results {
if result.Error != "" {
// Debug-level is intentional: a sweep scan of a /24 typically
// produces 200+ connection-refused results, and logging each
// at Warn would create log spam at default verbosity. The
// aggregate count in the Info-level scan-completed log surfaces
// the failure volume to operators; Debug provides the detail
// when diagnosing a specific endpoint.
if s.logger != nil {
s.logger.Debug("scan endpoint error",
"address", result.Address,
"error", result.Error)
}
scanErrors = append(scanErrors, fmt.Sprintf("%s: %s", result.Address, result.Error))
continue
}
entries = append(entries, result.Certs...)
}
return entries, scanErrors
}
// scanEndpoints probes TLS endpoints concurrently and returns results.
func (s *NetworkScanService) scanEndpoints(ctx context.Context, endpoints []string, timeout time.Duration) []domain.NetworkScanResult {
results := make([]domain.NetworkScanResult, len(endpoints))
+110
View File
@@ -491,3 +491,113 @@ func TestExpandCIDR_SingleLinkLocalIP(t *testing.T) {
t.Errorf("expected empty for cloud metadata IP, got %v", ips)
}
}
// TestCollectScanResults_AggregatesErrors is the M-9 regression guard:
// per-endpoint probe failures must accumulate into the errors slice so the
// summary Info log and the DiscoveryReport reflect the true failure count.
// Before the M-9 fix, scanErrors was declared but never appended to, so the
// aggregate count was always zero and the scan record's Errors field was
// always nil — silently hiding per-endpoint failures from operators.
func TestCollectScanResults_AggregatesErrors(t *testing.T) {
svc := &NetworkScanService{}
results := []domain.NetworkScanResult{
{Address: "203.0.113.1:443", Error: "connection refused"},
{Address: "203.0.113.2:443", Certs: []domain.DiscoveredCertEntry{
{CommonName: "example.com"},
}},
{Address: "203.0.113.3:443", Error: "tls handshake failure"},
{Address: "203.0.113.4:443", Certs: []domain.DiscoveredCertEntry{
{CommonName: "internal.example.com"},
}},
{Address: "203.0.113.5:443", Error: "i/o timeout"},
}
entries, errs := svc.collectScanResults(results)
if len(entries) != 2 {
t.Errorf("expected 2 entries (one per successful probe), got %d", len(entries))
}
if len(errs) != 3 {
t.Fatalf("expected 3 error strings (one per failed probe), got %d: %v", len(errs), errs)
}
// Each error string must be non-empty and include the endpoint address so
// the scan record lets operators correlate failures back to endpoints
// without needing Debug logging enabled.
for i, e := range errs {
if e == "" {
t.Errorf("error[%d]: expected non-empty error string", i)
}
}
// Spot-check that address is threaded through the error strings.
if want := "203.0.113.1:443"; errs[0] == "" || errs[0][:len(want)] != want {
t.Errorf("errs[0] should start with %q, got %q", want, errs[0])
}
if want := "203.0.113.3:443"; errs[1] == "" || errs[1][:len(want)] != want {
t.Errorf("errs[1] should start with %q, got %q", want, errs[1])
}
if want := "203.0.113.5:443"; errs[2] == "" || errs[2][:len(want)] != want {
t.Errorf("errs[2] should start with %q, got %q", want, errs[2])
}
}
// TestCollectScanResults_AllSuccess exercises the happy path: a scan where
// every endpoint returned certificates. The errors slice must be nil (not an
// empty non-nil slice) so the downstream DiscoveryReport.Errors field stays
// nil as well, preserving the JSON-omitempty behavior that callers rely on.
func TestCollectScanResults_AllSuccess(t *testing.T) {
svc := &NetworkScanService{}
results := []domain.NetworkScanResult{
{Address: "203.0.113.10:443", Certs: []domain.DiscoveredCertEntry{
{CommonName: "a.example.com"},
}},
{Address: "203.0.113.11:443", Certs: []domain.DiscoveredCertEntry{
{CommonName: "b.example.com"},
}},
}
entries, errs := svc.collectScanResults(results)
if len(entries) != 2 {
t.Errorf("expected 2 entries, got %d", len(entries))
}
if errs != nil {
t.Errorf("expected nil errors slice on all-success, got %v", errs)
}
}
// TestCollectScanResults_AllFailed exercises the worst-case sweep: every
// endpoint failed to probe. Entries must be nil, and every failure must be
// recorded in the errors slice so the scan record is complete.
func TestCollectScanResults_AllFailed(t *testing.T) {
svc := &NetworkScanService{}
results := []domain.NetworkScanResult{
{Address: "203.0.113.20:443", Error: "connection refused"},
{Address: "203.0.113.21:443", Error: "connection refused"},
{Address: "203.0.113.22:443", Error: "connection refused"},
}
entries, errs := svc.collectScanResults(results)
if entries != nil {
t.Errorf("expected nil entries on all-failed, got %v", entries)
}
if len(errs) != 3 {
t.Errorf("expected 3 error strings, got %d: %v", len(errs), errs)
}
}
// TestCollectScanResults_Empty guards against a degenerate empty-input case
// (scanEndpoints returns no results, e.g. if ctx was cancelled before the
// first probe ran). Both return slices must be nil.
func TestCollectScanResults_Empty(t *testing.T) {
svc := &NetworkScanService{}
entries, errs := svc.collectScanResults(nil)
if entries != nil {
t.Errorf("expected nil entries for empty input, got %v", entries)
}
if errs != nil {
t.Errorf("expected nil errors for empty input, got %v", errs)
}
}