From 6444e13b8fa018158f03fefda08df8281a7589d8 Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Mon, 11 May 2026 16:06:20 +0000 Subject: [PATCH] fix(migrations): two cold-DB regressions surfaced by Phase-9 docker compose smoke MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v2.1.0 release-gate Phase-9 docker compose smoke run against a fresh Postgres surfaced two real defects in the migration files that testcontainers schema-per-test never exercised. Both reproduce by running 'docker compose down -v && docker compose up --build' against the current master tree. Bug A — migration 000045_users_deactivated_at.up.sql is malformed. The 000029 schema defines: permissions (id TEXT PRIMARY KEY, name TEXT NOT NULL UNIQUE, namespace TEXT NOT NULL) role_permissions (..., permission_id TEXT NOT NULL REFERENCES ..., ...) But 000045 was written as: INSERT INTO permissions (name) VALUES ... -- missing id + namespace INSERT INTO role_permissions (role_id, permission, ...) VALUES ... ^^ wrong column name On a cold-DB run this fails immediately with: pq: null value in column "id" of relation "permissions" violates not-null constraint Fix: provide id + namespace columns, use permission_id (the actual column name), ON CONFLICT (id) DO NOTHING. The new permission ids follow the existing 'p-auth-*' prefix convention (p-auth-user-read + p-auth-user-deactivate) used by 000029. Bug B — migration 000029_rbac.up.sql is not idempotent post-000043. 000029 originally created actor_roles with: UNIQUE (actor_id, actor_type, role_id, tenant_id) Audit 2026-05-10 HIGH-10 closure / migration 000043 drops that constraint and re-creates it WITH scope columns: UNIQUE (actor_id, actor_type, role_id, scope_type, scope_id, tenant_id) The migration runner (internal/repository/postgres/db.go::RunMigrations) is naive — no tracker table — and re-runs every *.up.sql file on every server boot. On the second-and-later boots, 000029's seed INSERT for actor-demo-anon-admin still references the pre-000043 constraint name in its ON CONFLICT clause: ON CONFLICT (actor_id, actor_type, role_id, tenant_id) DO NOTHING Postgres errors out with: pq: there is no unique or exclusion constraint matching the ON CONFLICT specification Fix: pin the conflict target to the row's primary key 'id' column (always present, never altered). The seed row's deterministic id 'ar-demo-anon-admin' makes ON CONFLICT (id) work under both pre- and post-000043 schemas. Why testcontainers schema-per-test missed these: Each test in internal/repository/postgres/*_test.go spins up a fresh schema and applies every .up.sql in order ONCE. The full '000029 -> 000043 -> retry 000029' cascade never happens because migrations don't re-run within a test. Phase-9 docker compose smoke is the only test path that exercises the server-restart- on-error retry, which is exactly the missing coverage. Verify (sandbox): go test ./internal/repository/postgres/ PASS. Workstation re-runs 'docker compose down -v && docker compose up' to confirm both bugs are closed. --- migrations/000029_rbac.up.sql | 12 +++++++- migrations/000045_users_deactivated_at.up.sql | 30 +++++++++++-------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/migrations/000029_rbac.up.sql b/migrations/000029_rbac.up.sql index 43e6da2..6870850 100644 --- a/migrations/000029_rbac.up.sql +++ b/migrations/000029_rbac.up.sql @@ -268,6 +268,16 @@ ON CONFLICT (role_id, permission_id, scope_type, scope_id) DO NOTHING; -- The row exists unconditionally; the env-var check happens in code. -- Reserved system actor: API rejects mutations / deletions targeting -- this id with 409 Conflict. +-- v2.1.0 Phase-9 cold-DB-smoke fix: this ON CONFLICT used to reference +-- UNIQUE (actor_id, actor_type, role_id, tenant_id), the constraint +-- created by this migration. Migration 000043 (Audit 2026-05-10 HIGH-10 +-- closure) drops that constraint and re-creates it with scope_type + +-- scope_id columns. On a re-run of this migration (which the naive +-- file-walking RunMigrations does on every server boot), the original +-- ON CONFLICT spec no longer matches an existing constraint and +-- Postgres errors out. Pin the conflict target to `id` (the PK column, +-- unconditionally present in both pre- and post-000043 schemas) so the +-- seed stays idempotent across migration-ordering changes downstream. INSERT INTO actor_roles (id, actor_id, actor_type, role_id, granted_at, granted_by, tenant_id) VALUES ( 'ar-demo-anon-admin', @@ -278,6 +288,6 @@ VALUES ( 'system', 't-default' ) -ON CONFLICT (actor_id, actor_type, role_id, tenant_id) DO NOTHING; +ON CONFLICT (id) DO NOTHING; COMMIT; diff --git a/migrations/000045_users_deactivated_at.up.sql b/migrations/000045_users_deactivated_at.up.sql index 700ee63..00a965a 100644 --- a/migrations/000045_users_deactivated_at.up.sql +++ b/migrations/000045_users_deactivated_at.up.sql @@ -19,21 +19,27 @@ BEGIN; ALTER TABLE users ADD COLUMN IF NOT EXISTS deactivated_at TIMESTAMPTZ; -INSERT INTO permissions (name) VALUES - ('auth.user.read'), - ('auth.user.deactivate') -ON CONFLICT (name) DO NOTHING; +-- v2.1.0 Phase-9 cold-DB-smoke fix: the original commit set just +-- (name) and used a `permission` column in role_permissions. The +-- 000029 schema actually defines `permissions (id PK, name UNIQUE, +-- namespace NOT NULL)` and `role_permissions (..., permission_id, ...)`. +-- testcontainers schema-per-test never exercised this path; the cold +-- compose-up smoke caught it on a fresh Postgres. +INSERT INTO permissions (id, name, namespace) VALUES + ('p-auth-user-read', 'auth.user.read', 'auth.user'), + ('p-auth-user-deactivate', 'auth.user.deactivate', 'auth.user') +ON CONFLICT (id) DO NOTHING; -- Read is broad (admin / operator / auditor). -INSERT INTO role_permissions (role_id, permission, scope_type, scope_id) VALUES - ('r-admin', 'auth.user.read', 'global', NULL), - ('r-operator', 'auth.user.read', 'global', NULL), - ('r-auditor', 'auth.user.read', 'global', NULL) -ON CONFLICT DO NOTHING; +INSERT INTO role_permissions (role_id, permission_id, scope_type, scope_id) VALUES + ('r-admin', 'p-auth-user-read', 'global', NULL), + ('r-operator', 'p-auth-user-read', 'global', NULL), + ('r-auditor', 'p-auth-user-read', 'global', NULL) +ON CONFLICT (role_id, permission_id, scope_type, scope_id) DO NOTHING; -- Deactivate is admin-only. -INSERT INTO role_permissions (role_id, permission, scope_type, scope_id) VALUES - ('r-admin', 'auth.user.deactivate', 'global', NULL) -ON CONFLICT DO NOTHING; +INSERT INTO role_permissions (role_id, permission_id, scope_type, scope_id) VALUES + ('r-admin', 'p-auth-user-deactivate', 'global', NULL) +ON CONFLICT (role_id, permission_id, scope_type, scope_id) DO NOTHING; COMMIT;