From 910097eb308cb6a4f21c153805742d853cbaf9bd Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Tue, 12 May 2026 15:31:55 +0000 Subject: [PATCH] =?UTF-8?q?fix(migrations):=20000043=20idempotency=20?= =?UTF-8?q?=E2=80=94=20wrap=20CHECK=20+=20UNIQUE=20adds=20in=20DO=20blocks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cold-DB compose smoke ran the migration ladder twice (first cold-boot, then smoke step 4 force-recreate certctl-server with the bootstrap token env var). On the second run, 000043 fails with: pq: constraint "actor_roles_scope_type_enum" for relation "actor_roles" already exists Server then crashloops trying the same migration every ~10s until the healthcheck times out and the smoke gives up (5 min wall clock). Root cause: internal/repository/postgres/db.go::RunMigrations has no schema_migrations tracker — every *.up.sql runs on every boot. That makes idempotency mandatory; the CLAUDE.md architecture decision 'Idempotent migrations. IF NOT EXISTS + ON CONFLICT for safe repeated execution' is the contract every migration must honor. Most do; 000043 didn't. PostgreSQL CHECK constraints don't support IF NOT EXISTS directly, so each non-idempotent statement gets wrapped in a DO block that guards against duplication via pg_constraint lookup. The canonical pattern lives in migrations/000033_approval_kinds.up.sql — mirrored here exactly. ADD COLUMN already used IF NOT EXISTS; DROP CONSTRAINT already used IF EXISTS; CREATE INDEX already used IF NOT EXISTS. Only the two ADD CONSTRAINT CHECK and one ADD CONSTRAINT UNIQUE needed the DO-block wrap. Wrapped in BEGIN/COMMIT to match 000033 — keeps all schema changes inside a single transaction. Behavior: - Fresh DB: every DO block runs the ADD CONSTRAINT (no row in pg_constraint yet). Schema lands identically to the non-idempotent original. - Warm DB (constraints already present): every DO block short-circuits via the NOT EXISTS guard. Migration is a no-op. Same bug class as 2026-05-09 migration 000045 broken INSERT (commit def4be9) and the 2026-05-09 migration 000029 PRIMARY KEY fix. THIRD time the non-idempotent migration pattern slipped past code review — strongly suggests a CI guard that scans every *.up.sql for un-guarded ADD CONSTRAINT is the next follow-up. Audit-Closes: post-v2.1.0-anti-rot/item-6 Audit-Closes: audit-2026-05-10/HIGH-10-followon --- migrations/000043_actor_role_scope.up.sql | 55 ++++++++++++++++++----- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/migrations/000043_actor_role_scope.up.sql b/migrations/000043_actor_role_scope.up.sql index 2022141..aa95da6 100644 --- a/migrations/000043_actor_role_scope.up.sql +++ b/migrations/000043_actor_role_scope.up.sql @@ -15,22 +15,45 @@ -- already understands the tuple shape (role_permissions carries the -- same columns); the actor-role addition gives operators a second -- knob without forcing them to fork roles. +-- +-- Idempotency note (2026-05-12 cold-DB smoke closure): the +-- certctl-server boot sequence runs every *.up.sql on every start +-- (internal/repository/postgres/db.go::RunMigrations has no +-- schema_migrations tracker — see CLAUDE.md "Idempotent migrations" +-- architecture decision). All non-IF-NOT-EXISTS operations below +-- must be wrapped in DO blocks that skip when the object already +-- exists. The canonical pattern lives in +-- migrations/000033_approval_kinds.up.sql. -- ============================================================================= +BEGIN; + ALTER TABLE actor_roles ADD COLUMN IF NOT EXISTS scope_type TEXT NOT NULL DEFAULT 'global', ADD COLUMN IF NOT EXISTS scope_id TEXT; -ALTER TABLE actor_roles - ADD CONSTRAINT actor_roles_scope_type_enum - CHECK (scope_type IN ('global', 'profile', 'issuer')); +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_constraint + WHERE conname = 'actor_roles_scope_type_enum') THEN + ALTER TABLE actor_roles + ADD CONSTRAINT actor_roles_scope_type_enum + CHECK (scope_type IN ('global', 'profile', 'issuer')); + END IF; +END$$; -ALTER TABLE actor_roles - ADD CONSTRAINT actor_roles_scope_id_required_when_not_global - CHECK ( - (scope_type = 'global' AND scope_id IS NULL) OR - (scope_type IN ('profile', 'issuer') AND scope_id IS NOT NULL) - ); +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_constraint + WHERE conname = 'actor_roles_scope_id_required_when_not_global') THEN + ALTER TABLE actor_roles + ADD CONSTRAINT actor_roles_scope_id_required_when_not_global + CHECK ( + (scope_type = 'global' AND scope_id IS NULL) OR + (scope_type IN ('profile', 'issuer') AND scope_id IS NOT NULL) + ); + END IF; +END$$; -- The (actor_id, actor_type, role_id, tenant_id) uniqueness must -- relax: an operator can grant the same role to the same actor at @@ -38,9 +61,17 @@ ALTER TABLE actor_roles ALTER TABLE actor_roles DROP CONSTRAINT IF EXISTS actor_roles_actor_id_actor_type_role_id_tenant_id_key; -ALTER TABLE actor_roles - ADD CONSTRAINT actor_roles_actor_role_scope_unique - UNIQUE (actor_id, actor_type, role_id, scope_type, scope_id, tenant_id); +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_constraint + WHERE conname = 'actor_roles_actor_role_scope_unique') THEN + ALTER TABLE actor_roles + ADD CONSTRAINT actor_roles_actor_role_scope_unique + UNIQUE (actor_id, actor_type, role_id, scope_type, scope_id, tenant_id); + END IF; +END$$; CREATE INDEX IF NOT EXISTS idx_actor_roles_scope ON actor_roles(scope_type, scope_id) WHERE scope_id IS NOT NULL; + +COMMIT;