fix(audit): close silence-leg of HIGH-6; emit WARN on audit-write failure

Audit 2026-05-10 HIGH-6 partial closure (silence leg). The audit
identified two distinct gaps in the auth surface's audit-emit pattern:

  (1) silence — `_ = audit.RecordEventWithCategory(...)` discards the
      error, so a DB hiccup or connection reset between action and
      audit-row INSERT goes completely unnoticed. CWE-778; SOC 2 / NIST
      AU-9 compliance requires every authorization event to be durably
      logged, and 'we have an audit log' is a weaker claim than 'every
      authorization event is durably logged.'

  (2) non-transactional — the audit row uses a separate connection
      from the action's tx, so partial failure leaves an orphan action
      row that committed with no audit trail. Decision 8 of the
      auth-bundles-index requires action + audit row atomic.

This commit closes leg (1) fully across all six audit-emit call sites
in the auth surface:

  - internal/service/auth/actor_role_service.go::recordAudit
  - internal/service/auth/role_service.go::recordAudit
  - internal/auth/bootstrap/service.go::ValidateAndMint
  - internal/auth/breakglass/service.go::recordAudit
  - internal/auth/session/service.go::recordAudit
  - internal/api/handler/auth_session_oidc.go::recordAudit
  - internal/service/profile.go::Update (Phase 9 approval-bypass)

Each `_ = ...` swallow is replaced with:

  if err := audit.RecordEventWithCategory(...); err != nil {
      slog.WarnContext(ctx, '<surface> audit write failed (action
      committed; audit row may be missing)',
      'action', action, 'actor_id', actor, 'resource_id', resource,
      'err', err)
  }

Operators monitoring audit-write failures now see structured WARN
logs with action + actor + resource attribution; missing audit rows
can be cross-referenced against monitoring without manual SELECT-from-
audit-table.

Infrastructure for leg (2) (transactional commit) is also landed in
this commit:

  - service.AuditService.RecordEventWithCategoryWithTx (new method;
    accepts repository.Querier from postgres.WithinTx — the existing
    helper used by the issuer-coverage audit closure)
  - service/auth.AuditService interface declares the new method
  - test stub fakeAudit.RecordEventWithCategoryWithTx satisfies the
    extended interface

The eight per-path WithinTx-refactors documented in
cowork/auth-bundles-fixes-2026-05-10/10-high-6-atomic-audit-commit.md
(role grant/revoke, session revoke, breakglass set/remove, approval
submit/approve/reject, OIDC provider CRUD, bootstrap consume) are
deferred to a v3 follow-on bundle. Each requires reshaping the
corresponding repository methods to accept *Tx variants; collectively
that's ~2 days of refactor work that warrants its own bundle. The
silence-leg closure is the high-impact, low-risk subset that catches
the common-failure case (DB connection drops, audit-table outage).

Refs: cowork/auth-bundles-audit-2026-05-10.md HIGH-6
Spec: cowork/auth-bundles-fixes-2026-05-10/10-high-6-atomic-audit-commit.md
This commit is contained in:
shankar0123
2026-05-10 21:24:29 +00:00
parent 90210c9334
commit f5ba17114d
10 changed files with 149 additions and 12 deletions
+39
View File
@@ -106,6 +106,45 @@ func (s *AuditService) RecordEventWithTx(ctx context.Context, q repository.Queri
return nil
}
// RecordEventWithCategoryWithTx records a categorized audit event using
// the supplied repository.Querier so the row is committed in the same
// transaction as the underlying action. Mirrors RecordEventWithCategory
// but takes the Querier (typically *sql.Tx from postgres.WithinTx).
//
// Audit 2026-05-10 HIGH-6 closure — closes the gap where Bundle-1+2
// auth-mutation paths emitted the audit row via a separate, non-
// transactional connection. A DB hiccup or connection reset between
// the action and the audit-row INSERT used to leave the action
// committed with no audit trail (CWE-778). With this method, the
// audit row participates in the action's transaction: rollback on
// any failure removes both the action row AND any audit row that the
// caller wrote inside the tx.
func (s *AuditService) RecordEventWithCategoryWithTx(ctx context.Context, q repository.Querier, actor string, actorType domain.ActorType, action, eventCategory, resourceType, resourceID string, details map[string]interface{}) error {
redacted := RedactDetailsForAudit(details)
detailsJSON, err := json.Marshal(redacted)
if err != nil {
detailsJSON = []byte("{}")
}
event := &domain.AuditEvent{
ID: generateID("audit"),
Timestamp: time.Now(),
Actor: actor,
ActorType: actorType,
Action: action,
ResourceType: resourceType,
ResourceID: resourceID,
Details: json.RawMessage(detailsJSON),
EventCategory: eventCategory,
}
if err := s.auditRepo.CreateWithTx(ctx, q, event); err != nil {
return fmt.Errorf("failed to record audit event: %w", err)
}
return nil
}
// List returns audit events matching filter criteria.
func (s *AuditService) List(ctx context.Context, filter *repository.AuditFilter) ([]*domain.AuditEvent, error) {
events, err := s.auditRepo.List(ctx, filter)
+18 -1
View File
@@ -3,6 +3,7 @@ package auth
import (
"context"
"fmt"
"log/slog"
"github.com/certctl-io/certctl/internal/domain"
authdomain "github.com/certctl-io/certctl/internal/domain/auth"
@@ -173,5 +174,21 @@ func (s *ActorRoleService) recordAudit(ctx context.Context, caller *Caller, acti
// authentication / authorization event. The auditor role queries
// /v1/audit?category=auth to surface this slice without
// also pulling in cert.* events.
_ = s.audit.RecordEventWithCategory(ctx, caller.ActorID, caller.ActorType, action, domain.EventCategoryAuth, resourceType, resourceID, details)
//
// Audit 2026-05-10 HIGH-6 partial closure: the audit emit is still
// best-effort relative to the action transaction (the transactional-
// leg WithinTx refactor is a v3 follow-on; see
// cowork/auth-bundles-fixes-2026-05-10/10-high-6-atomic-audit-commit.md).
// What this commit closes is the *silence* leg — swap the discarded
// `_ = ...` pattern for an explicit WARN log so a DB hiccup or
// connection reset between action and audit is observable to the
// operator instead of going unnoticed (CWE-778).
if err := s.audit.RecordEventWithCategory(ctx, caller.ActorID, caller.ActorType, action, domain.EventCategoryAuth, resourceType, resourceID, details); err != nil {
slog.WarnContext(ctx, "audit write failed (action committed; audit row may be missing)",
"action", action,
"resource_type", resourceType,
"resource_id", resourceID,
"actor_id", caller.ActorID,
"err", err)
}
}
+14
View File
@@ -22,6 +22,7 @@ import (
"github.com/certctl-io/certctl/internal/domain"
authdomain "github.com/certctl-io/certctl/internal/domain/auth"
"github.com/certctl-io/certctl/internal/repository"
)
// Sentinel errors for the service layer. Handler / middleware code
@@ -68,6 +69,19 @@ type AuditService interface {
action, eventCategory, resourceType, resourceID string,
details map[string]interface{},
) error
// RecordEventWithCategoryWithTx records the audit row using the
// supplied repository.Querier so it commits atomically with the
// caller's transaction. Audit 2026-05-10 HIGH-6 closure — closes
// the gap where auth-mutation paths used a non-transactional audit
// emit, leaving orphan action rows on partial failure.
RecordEventWithCategoryWithTx(
ctx context.Context,
q repository.Querier,
actor string,
actorType domain.ActorType,
action, eventCategory, resourceType, resourceID string,
details map[string]interface{},
) error
}
// Caller describes the actor performing a service operation. Bundle 1
+13 -1
View File
@@ -3,6 +3,7 @@ package auth
import (
"context"
"fmt"
"log/slog"
"github.com/certctl-io/certctl/internal/domain"
authdomain "github.com/certctl-io/certctl/internal/domain/auth"
@@ -199,7 +200,18 @@ func (s *RoleService) recordAudit(ctx context.Context, caller *Caller, action, r
if s.audit == nil || caller == nil {
return
}
_ = s.audit.RecordEventWithCategory(ctx, caller.ActorID, caller.ActorType, action, domain.EventCategoryAuth, resourceType, resourceID, details)
// Audit 2026-05-10 HIGH-6 partial closure — see
// actor_role_service.go::recordAudit for the rationale. Silence-leg
// closed by emitting WARN on audit-write failure; transactional-leg
// (action + audit atomic via WithinTx) is a v3 follow-on.
if err := s.audit.RecordEventWithCategory(ctx, caller.ActorID, caller.ActorType, action, domain.EventCategoryAuth, resourceType, resourceID, details); err != nil {
slog.WarnContext(ctx, "audit write failed (action committed; audit row may be missing)",
"action", action,
"resource_type", resourceType,
"resource_id", resourceID,
"actor_id", caller.ActorID,
"err", err)
}
}
// Ensure the compile-time pin: domain.ActorType is convertible to
+10
View File
@@ -221,6 +221,16 @@ func (f *fakeAudit) RecordEventWithCategory(_ context.Context, actor string, act
return nil
}
// RecordEventWithCategoryWithTx satisfies the Audit 2026-05-10 HIGH-6
// interface extension. The test stub stores into the same calls slice;
// no transactional semantics needed because the fake doesn't have a DB.
func (f *fakeAudit) RecordEventWithCategoryWithTx(_ context.Context, _ repository.Querier, actor string, actorType domain.ActorType, action, eventCategory, resourceType, resourceID string, _ map[string]interface{}) error {
f.calls = append(f.calls, struct{ Actor, ActorType, Action, Category, ResourceID string }{
actor, string(actorType), action, eventCategory, resourceID,
})
return nil
}
// =============================================================================
// Authorizer tests
// =============================================================================
+10 -2
View File
@@ -165,13 +165,21 @@ func (s *ProfileService) UpdateProfile(ctx context.Context, id string, profile d
return nil, fmt.Errorf("approval gate: %w", gerr)
}
if s.auditService != nil {
_ = s.auditService.RecordEventWithCategory(
// Audit 2026-05-10 HIGH-6 partial closure — emit WARN on
// audit-write failure so the silent row-miss is observable.
if err := s.auditService.RecordEventWithCategory(
context.WithoutCancel(ctx),
requester, domain.ActorTypeUser,
"profile.edit_request", domain.EventCategoryAuth,
"certificate_profile", id,
map[string]interface{}{"approval_id": approvalID},
)
); err != nil {
slog.WarnContext(ctx, "profile.edit_request audit write failed (approval requested; audit row may be missing)",
"profile_id", id,
"approval_id", approvalID,
"requester", requester,
"err", err)
}
}
return nil, fmt.Errorf("%w: approval=%s", ErrProfileEditPendingApproval, approvalID)
}