From 91adc52263773b87df279fe1c1186b581c814112 Mon Sep 17 00:00:00 2001 From: Eugene Blikh Date: Sat, 25 Apr 2026 22:34:36 +0300 Subject: [PATCH] feat(db): SQLite schema with FTS5 + migration runner --- Justfile | 12 + internal/deps/deps.go | 15 +- internal/platform/database/database.go | 115 ++++++ internal/platform/database/database_test.go | 384 ++++++++++++++++++ internal/platform/database/migrations.go | 14 + .../database/migrations/0001_init.down.sql | 17 + .../database/migrations/0001_init.up.sql | 120 ++++++ 7 files changed, 669 insertions(+), 8 deletions(-) create mode 100644 internal/platform/database/database.go create mode 100644 internal/platform/database/database_test.go create mode 100644 internal/platform/database/migrations.go create mode 100644 internal/platform/database/migrations/0001_init.down.sql create mode 100644 internal/platform/database/migrations/0001_init.up.sql diff --git a/Justfile b/Justfile index bef5b126c0c18350f3a70b6efcde833fd5f6d9d2..8be89bd0cdd5b1b0305a1d476c4877a0fd2e9821 100644 --- a/Justfile +++ b/Justfile @@ -26,6 +26,18 @@ fmt: tidy: go mod tidy +# Migration helpers. Requires the `migrate` CLI from golang-migrate/v4 to be +# on PATH. Install on macOS via: brew install golang-migrate +# (see https://github.com/golang-migrate/migrate/tree/master/cmd/migrate). +migrate-up: + migrate -path internal/platform/database/migrations -database "sqlite://./lethe.db" up + +migrate-down: + migrate -path internal/platform/database/migrations -database "sqlite://./lethe.db" down 1 + +migrate-create NAME: + migrate create -ext sql -dir internal/platform/database/migrations -seq {{NAME}} + docker-build: docker build -t {{binary}}:{{version}} . diff --git a/internal/deps/deps.go b/internal/deps/deps.go index c3de7e04a6d0783ef757503eaff796ed44a0cf3e..6aeae161653dfff4c3693546fad51d0d1c967bd2 100644 --- a/internal/deps/deps.go +++ b/internal/deps/deps.go @@ -1,21 +1,20 @@ // Package deps records the locked set of direct dependencies for the lethe // server during early scaffolding. Real packages adopt these as they come -// online (server — chi/prometheus; database — sqlx/modernc.org/sqlite/ -// golang-migrate; auth — go-oidc; platform — auxilia steward/scribe). +// online (server — chi/prometheus; auth — go-oidc; observability — +// auxilia/scribe; lifecycle — auxilia/steward). // // Phase 2 promoted viper, validator/v10, and culpa to real imports under -// internal/config; they no longer appear here. Once every dep below has at -// least one real importer, this file is expected to disappear in the same -// commit that completes the migration. +// internal/config. Phase 3 promoted sqlx, modernc.org/sqlite, and +// golang-migrate/v4 (plus its sqlite driver and iofs source) to real imports +// under internal/platform/database, and culpa is now used there too. Once +// every dep below has at least one real importer, this file is expected to +// disappear in the same commit that completes the migration. package deps import ( _ "github.com/coreos/go-oidc/v3/oidc" _ "github.com/go-chi/chi/v5" - _ "github.com/golang-migrate/migrate/v4" - _ "github.com/jmoiron/sqlx" _ "github.com/prometheus/client_golang/prometheus" _ "go.bigb.es/auxilia/scribe" _ "go.bigb.es/auxilia/steward" - _ "modernc.org/sqlite" ) diff --git a/internal/platform/database/database.go b/internal/platform/database/database.go new file mode 100644 index 0000000000000000000000000000000000000000..6ba7f767b1233e15bbbaa65947a416d1672879c7 --- /dev/null +++ b/internal/platform/database/database.go @@ -0,0 +1,115 @@ +package database + +import ( + "context" + "errors" + "fmt" + "log/slog" + "time" + + "github.com/golang-migrate/migrate/v4" + migratesqlite "github.com/golang-migrate/migrate/v4/database/sqlite" + "github.com/golang-migrate/migrate/v4/source/iofs" + "github.com/jmoiron/sqlx" + "go.bigb.es/auxilia/culpa" + _ "modernc.org/sqlite" // register the "sqlite" database/sql driver + + "sourcecraft.dev/bigbes/lethe/internal/config" +) + +// Database is the steward-managed SQLite storage steward. Other services +// inject *Database and read .DB. Constructor is zero-value; lifecycle hooks +// (Init/Destroy) match steward's Initer/Destroyer interfaces by signature. +type Database struct { + Cfg config.DatabaseConfig `config:""` + DB *sqlx.DB +} + +// Init opens the SQLite database with the locked pragmas (WAL, foreign keys +// on, NORMAL synchronous, busy timeout from config) and applies all embedded +// migrations. Errors are wrapped with culpa codes DB_OPEN / DB_MIGRATE so the +// HTTP layer can map them to a single status. +func (d *Database) Init(ctx context.Context) error { + dsn := buildDSN(d.Cfg.Path, d.Cfg.BusyTimeout) + db, err := sqlx.ConnectContext(ctx, "sqlite", dsn) + if err != nil { + return culpa.WithCode(culpa.Wrap(err, "open sqlite"), "DB_OPEN") + } + if err := Migrate(db); err != nil { + _ = db.Close() + return culpa.WithCode(culpa.Wrap(err, "apply migrations"), "DB_MIGRATE") + } + d.DB = db + return nil +} + +// Destroy closes the underlying database. Idempotent: calling Destroy after +// a partially-failed Init (or twice in a row) is a no-op. +func (d *Database) Destroy(_ context.Context) error { + if d.DB == nil { + return nil + } + db := d.DB + d.DB = nil + if err := db.Close(); err != nil { + return culpa.WithCode(culpa.Wrap(err, "close sqlite"), "DB_CLOSE") + } + return nil +} + +// Migrate applies every up migration embedded in FS to db. It is exported as +// a pure function so tests can drive it directly against an in-memory +// database without spinning up the steward graph. migrate.ErrNoChange means +// the database is already at the latest version and is treated as success. +func Migrate(db *sqlx.DB) error { + src, err := iofs.New(FS, "migrations") + if err != nil { + return culpa.Wrap(err, "build iofs source") + } + driver, err := migratesqlite.WithInstance(db.DB, &migratesqlite.Config{}) + if err != nil { + return culpa.Wrap(err, "build sqlite migrate driver") + } + m, err := migrate.NewWithInstance("iofs", src, "sqlite", driver) + if err != nil { + return culpa.Wrap(err, "build migrator") + } + if err := m.Up(); err != nil && !errors.Is(err, migrate.ErrNoChange) { + return culpa.Wrap(err, "migrate up") + } + return nil +} + +// InTx runs fn inside a single sqlx transaction, committing on success and +// rolling back on any returned error. The caller's error is preserved as-is +// so errors.Is/As work upstream; rollback failures are logged via slog. +func InTx(ctx context.Context, db *sqlx.DB, fn func(*sqlx.Tx) error) error { + tx, err := db.BeginTxx(ctx, nil) + if err != nil { + return culpa.WithCode(culpa.Wrap(err, "begin tx"), "DB_TX_BEGIN") + } + if err := fn(tx); err != nil { + if rbErr := tx.Rollback(); rbErr != nil { + slog.ErrorContext(ctx, "rollback failed", + slog.String("rollback_err", rbErr.Error()), + slog.String("orig_err", err.Error()), + ) + } + return err + } + if err := tx.Commit(); err != nil { + return culpa.WithCode(culpa.Wrap(err, "commit tx"), "DB_TX_COMMIT") + } + return nil +} + +// buildDSN renders a modernc.org/sqlite-compatible DSN that pins the +// pragmas every connection in the pool must apply. We pass busy_timeout in +// milliseconds (the unit the SQLite pragma expects) derived from the +// configured time.Duration. +func buildDSN(path string, busyTimeout time.Duration) string { + return fmt.Sprintf( + "%s?_pragma=journal_mode(WAL)&_pragma=busy_timeout(%d)&_pragma=foreign_keys(on)&_pragma=synchronous(NORMAL)&_pragma=cache_size(-2000)", + path, busyTimeout.Milliseconds(), + ) +} diff --git a/internal/platform/database/database_test.go b/internal/platform/database/database_test.go new file mode 100644 index 0000000000000000000000000000000000000000..1dbc2e9d1f12d4291f02b0ce0a2007cd6eb939d7 --- /dev/null +++ b/internal/platform/database/database_test.go @@ -0,0 +1,384 @@ +package database + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/jmoiron/sqlx" + _ "modernc.org/sqlite" + + "sourcecraft.dev/bigbes/lethe/internal/config" +) + +// openTestDB opens a fresh :memory: SQLite database with the same pragmas the +// real Database service applies, then runs Migrate. Each call returns a +// distinct in-memory database; we use the modernc.org/sqlite "?_pragma=" +// query parameters to guarantee FK enforcement and WAL/busy timeout settings +// match production. +func openTestDB(t *testing.T) *sqlx.DB { + t.Helper() + dsn := buildDSN(":memory:", 5*time.Second) + db, err := sqlx.Connect("sqlite", dsn) + if err != nil { + t.Fatalf("connect: %v", err) + } + t.Cleanup(func() { _ = db.Close() }) + if err := Migrate(db); err != nil { + t.Fatalf("migrate: %v", err) + } + return db +} + +func insertSession(t *testing.T, db *sqlx.DB, owner, tool, host, sessionID string) { + t.Helper() + _, err := db.Exec(` + INSERT INTO sessions + (owner, tool, host, session_id, started_at, ended_at, working_dir, source_file, metadata) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `, owner, tool, host, sessionID, 1700000000, 1700000100, nil, "/tmp/x.jsonl", nil) + if err != nil { + t.Fatalf("insert session: %v", err) + } +} + +func insertTurn(t *testing.T, db *sqlx.DB, owner, tool, host, sessionID, turnID, content string, toolCalls *string) { + t.Helper() + _, err := db.Exec(` + INSERT INTO turns + (owner, tool, host, session_id, turn_id, seq, role, timestamp, content, tool_calls) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `, owner, tool, host, sessionID, turnID, 1, "user", 1700000050, content, toolCalls) + if err != nil { + t.Fatalf("insert turn: %v", err) + } +} + +func TestMigrateIsIdempotent(t *testing.T) { + dsn := buildDSN(":memory:", 5*time.Second) + db, err := sqlx.Connect("sqlite", dsn) + if err != nil { + t.Fatalf("connect: %v", err) + } + t.Cleanup(func() { _ = db.Close() }) + + if err := Migrate(db); err != nil { + t.Fatalf("first migrate: %v", err) + } + // Second invocation must succeed (golang-migrate returns ErrNoChange, + // which Migrate translates into nil). + if err := Migrate(db); err != nil { + t.Fatalf("second migrate: %v", err) + } +} + +func TestTurnInsertPopulatesTurnsFTSWithOwner(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + insertTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", "hello world from alice", nil) + + var n int + if err := db.Get(&n, `SELECT COUNT(*) FROM turns_fts WHERE owner = 'alice' AND turns_fts MATCH 'hello'`); err != nil { + t.Fatalf("query fts: %v", err) + } + if n != 1 { + t.Fatalf("expected 1 fts row for owner=alice matching 'hello', got %d", n) + } +} + +func TestTurnUpdateUpdatesTurnsFTS(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + insertTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", "original phrase", nil) + + if _, err := db.Exec(` + UPDATE turns SET content = ? WHERE owner = ? AND tool = ? AND host = ? AND session_id = ? AND turn_id = ? + `, "replaced phrase", "alice", "cc", "phoebe", "s1", "t1"); err != nil { + t.Fatalf("update: %v", err) + } + + var oldHits, newHits int + if err := db.Get(&oldHits, `SELECT COUNT(*) FROM turns_fts WHERE turns_fts MATCH 'original'`); err != nil { + t.Fatalf("query old: %v", err) + } + if err := db.Get(&newHits, `SELECT COUNT(*) FROM turns_fts WHERE turns_fts MATCH 'replaced'`); err != nil { + t.Fatalf("query new: %v", err) + } + if oldHits != 0 { + t.Fatalf("expected 0 hits for 'original' after update, got %d", oldHits) + } + if newHits != 1 { + t.Fatalf("expected 1 hit for 'replaced' after update, got %d", newHits) + } +} + +func TestTurnDeleteRemovesFromTurnsFTS(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + insertTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", "doomed content", nil) + + if _, err := db.Exec(`DELETE FROM turns WHERE turn_id = ?`, "t1"); err != nil { + t.Fatalf("delete: %v", err) + } + + var n int + if err := db.Get(&n, `SELECT COUNT(*) FROM turns_fts`); err != nil { + t.Fatalf("query fts: %v", err) + } + if n != 0 { + t.Fatalf("expected empty turns_fts after delete, got %d rows", n) + } +} + +func TestToolOutputsFTSInsertUpdateDeleteWhenToolCallsPresent(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + tc := `{"name":"shell","args":"ls /tmp"}` + insertTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", "running tool", &tc) + + var hits int + if err := db.Get(&hits, `SELECT COUNT(*) FROM tool_outputs_fts WHERE owner = 'alice' AND tool_outputs_fts MATCH 'shell'`); err != nil { + t.Fatalf("query insert: %v", err) + } + if hits != 1 { + t.Fatalf("expected 1 tool_outputs_fts row, got %d", hits) + } + + tc2 := `{"name":"editor","args":"open"}` + if _, err := db.Exec(`UPDATE turns SET tool_calls = ? WHERE turn_id = ?`, tc2, "t1"); err != nil { + t.Fatalf("update tool_calls: %v", err) + } + if err := db.Get(&hits, `SELECT COUNT(*) FROM tool_outputs_fts WHERE tool_outputs_fts MATCH 'editor'`); err != nil { + t.Fatalf("query update: %v", err) + } + if hits != 1 { + t.Fatalf("expected 1 hit after update, got %d", hits) + } + if err := db.Get(&hits, `SELECT COUNT(*) FROM tool_outputs_fts WHERE tool_outputs_fts MATCH 'shell'`); err != nil { + t.Fatalf("query update old: %v", err) + } + if hits != 0 { + t.Fatalf("expected 0 hits for old tool_calls after update, got %d", hits) + } + + if _, err := db.Exec(`DELETE FROM turns WHERE turn_id = ?`, "t1"); err != nil { + t.Fatalf("delete: %v", err) + } + if err := db.Get(&hits, `SELECT COUNT(*) FROM tool_outputs_fts`); err != nil { + t.Fatalf("query delete: %v", err) + } + if hits != 0 { + t.Fatalf("expected empty tool_outputs_fts after delete, got %d", hits) + } +} + +func TestToolOutputsFTSSkipsNullToolCalls(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + insertTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", "no tool call here", nil) + + var n int + if err := db.Get(&n, `SELECT COUNT(*) FROM tool_outputs_fts`); err != nil { + t.Fatalf("count: %v", err) + } + if n != 0 { + t.Fatalf("expected tool_outputs_fts empty when tool_calls is NULL, got %d", n) + } +} + +func TestForeignKeyRejectsOrphanTurn(t *testing.T) { + db := openTestDB(t) + // No sessions row inserted. + _, err := db.Exec(` + INSERT INTO turns + (owner, tool, host, session_id, turn_id, seq, role, timestamp, content) + VALUES ('alice', 'cc', 'phoebe', 'ghost', 't1', 1, 'user', 1700000050, 'no parent') + `) + if err == nil { + t.Fatalf("expected FK violation, got nil") + } +} + +func TestTwoOwnersSameSessionTriple(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + insertSession(t, db, "bob", "cc", "phoebe", "s1") + + var n int + if err := db.Get(&n, `SELECT COUNT(*) FROM sessions WHERE tool = 'cc' AND host = 'phoebe' AND session_id = 's1'`); err != nil { + t.Fatalf("count: %v", err) + } + if n != 2 { + t.Fatalf("expected 2 sessions across owners, got %d", n) + } +} + +func TestFTSQueryFiltersByOwner(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + insertSession(t, db, "bob", "cc", "phoebe", "s2") + insertTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", "the quick brown fox", nil) + insertTurn(t, db, "bob", "cc", "phoebe", "s2", "t1", "the quick brown fox", nil) + + var alice, bob int + if err := db.Get(&alice, `SELECT COUNT(*) FROM turns_fts WHERE owner = 'alice' AND turns_fts MATCH 'quick'`); err != nil { + t.Fatalf("alice: %v", err) + } + if err := db.Get(&bob, `SELECT COUNT(*) FROM turns_fts WHERE owner = 'bob' AND turns_fts MATCH 'quick'`); err != nil { + t.Fatalf("bob: %v", err) + } + if alice != 1 || bob != 1 { + t.Fatalf("expected 1 hit per owner, got alice=%d bob=%d", alice, bob) + } + + // And cross-check overall row count is exactly 2. + var total int + if err := db.Get(&total, `SELECT COUNT(*) FROM turns_fts WHERE turns_fts MATCH 'quick'`); err != nil { + t.Fatalf("total: %v", err) + } + if total != 2 { + t.Fatalf("expected 2 total fts hits, got %d", total) + } +} + +func TestUpsertFiresUpdateTriggerAndKeepsFTSCoherent(t *testing.T) { + // Phase 7's ingest path uses INSERT ... ON CONFLICT DO UPDATE, which + // fires the UPDATE trigger (not INSERT) when the conflict branch is + // taken. Pin that contract here so a future SQLite/FTS5 regression + // trips a test instead of corrupting the index in production. + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + + // First insert. + if _, err := db.Exec(` + INSERT INTO turns + (owner, tool, host, session_id, turn_id, seq, role, timestamp, content, tool_calls) + VALUES ('alice','cc','phoebe','s1','t1',1,'user',1700000050,'first version', '{"name":"first"}') + `); err != nil { + t.Fatalf("first insert: %v", err) + } + + // Upsert with new content + new tool_calls. We exercise the same shape + // the ingest service will use: ON CONFLICT on the full composite PK + // with DO UPDATE SET on the mutating columns. + if _, err := db.Exec(` + INSERT INTO turns + (owner, tool, host, session_id, turn_id, seq, role, timestamp, content, tool_calls) + VALUES ('alice','cc','phoebe','s1','t1',1,'user',1700000060,'second version', '{"name":"second"}') + ON CONFLICT (owner, tool, host, session_id, turn_id) DO UPDATE SET + content = excluded.content, + tool_calls = excluded.tool_calls, + timestamp = excluded.timestamp + `); err != nil { + t.Fatalf("upsert: %v", err) + } + + // turns_fts: only "second" should match; "first" should not. + var n int + if err := db.Get(&n, `SELECT COUNT(*) FROM turns_fts WHERE turns_fts MATCH 'first'`); err != nil { + t.Fatalf("query first: %v", err) + } + if n != 0 { + t.Fatalf("expected old 'first' content gone after upsert, got %d hits", n) + } + if err := db.Get(&n, `SELECT COUNT(*) FROM turns_fts WHERE turns_fts MATCH 'second'`); err != nil { + t.Fatalf("query second: %v", err) + } + if n != 1 { + t.Fatalf("expected 1 hit for new 'second' content after upsert, got %d", n) + } + + // tool_outputs_fts: same expectation on the JSON column. + if err := db.Get(&n, `SELECT COUNT(*) FROM tool_outputs_fts WHERE tool_outputs_fts MATCH 'first'`); err != nil { + t.Fatalf("query tc first: %v", err) + } + if n != 0 { + t.Fatalf("expected old tool_calls gone after upsert, got %d", n) + } + if err := db.Get(&n, `SELECT COUNT(*) FROM tool_outputs_fts WHERE tool_outputs_fts MATCH 'second'`); err != nil { + t.Fatalf("query tc second: %v", err) + } + if n != 1 { + t.Fatalf("expected 1 hit for new tool_calls after upsert, got %d", n) + } +} + +func TestDatabaseInitDestroyOnMemoryDSN(t *testing.T) { + // End-to-end: the steward Init/Destroy contract drives the whole stack + // (DSN build + connect + migrate + close). + d := &Database{ + Cfg: config.DatabaseConfig{ + Path: ":memory:", + BusyTimeout: 5 * time.Second, + }, + } + ctx := context.Background() + if err := d.Init(ctx); err != nil { + t.Fatalf("Init: %v", err) + } + if d.DB == nil { + t.Fatalf("expected DB populated after Init") + } + // Confirm migrations ran end-to-end through Init. + var n int + if err := d.DB.Get(&n, `SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'sessions'`); err != nil { + t.Fatalf("query schema: %v", err) + } + if n != 1 { + t.Fatalf("expected sessions table to exist, got %d", n) + } + if err := d.Destroy(ctx); err != nil { + t.Fatalf("Destroy: %v", err) + } + // Idempotent on second Destroy. + if err := d.Destroy(ctx); err != nil { + t.Fatalf("second Destroy must be a no-op, got %v", err) + } +} + +func TestInTxCommitAndRollback(t *testing.T) { + db := openTestDB(t) + insertSession(t, db, "alice", "cc", "phoebe", "s1") + + // Commit path: insert a turn inside InTx, expect it visible after. + if err := InTx(context.Background(), db, func(tx *sqlx.Tx) error { + _, err := tx.Exec(` + INSERT INTO turns + (owner, tool, host, session_id, turn_id, seq, role, timestamp, content) + VALUES ('alice', 'cc', 'phoebe', 's1', 't1', 1, 'user', 1700000050, 'committed') + `) + return err + }); err != nil { + t.Fatalf("InTx commit: %v", err) + } + var n int + if err := db.Get(&n, `SELECT COUNT(*) FROM turns WHERE turn_id = 't1'`); err != nil { + t.Fatalf("count: %v", err) + } + if n != 1 { + t.Fatalf("commit path: expected 1 turn, got %d", n) + } + + // Rollback path: error inside fn rolls back; InTx returns the error. + sentinel := errors.New("rollback me") + err := InTx(context.Background(), db, func(tx *sqlx.Tx) error { + if _, err := tx.Exec(` + INSERT INTO turns + (owner, tool, host, session_id, turn_id, seq, role, timestamp, content) + VALUES ('alice', 'cc', 'phoebe', 's1', 't2', 2, 'user', 1700000060, 'rolled back') + `); err != nil { + return err + } + return sentinel + }) + if !errors.Is(err, sentinel) { + t.Fatalf("expected sentinel error from InTx, got %v", err) + } + if err := db.Get(&n, `SELECT COUNT(*) FROM turns WHERE turn_id = 't2'`); err != nil { + t.Fatalf("count after rollback: %v", err) + } + if n != 0 { + t.Fatalf("rollback path: expected 0 rows for t2, got %d", n) + } +} diff --git a/internal/platform/database/migrations.go b/internal/platform/database/migrations.go new file mode 100644 index 0000000000000000000000000000000000000000..27e2963e006980e4bfc895332f555a6cc1ee2140 --- /dev/null +++ b/internal/platform/database/migrations.go @@ -0,0 +1,14 @@ +// Package database hosts the SQLite-backed storage layer for lethe. +// +// All schema changes go through embedded migrations that golang-migrate +// applies on startup; this file owns the embed.FS only. +package database + +import "embed" + +// FS holds the SQL migration files. Phase 7 (ingest) and beyond add new +// migrations alongside 0001_init by dropping additional `NNNN_*.up.sql` / +// `.down.sql` pairs into the migrations directory; no code changes required. +// +//go:embed migrations/*.sql +var FS embed.FS diff --git a/internal/platform/database/migrations/0001_init.down.sql b/internal/platform/database/migrations/0001_init.down.sql new file mode 100644 index 0000000000000000000000000000000000000000..c2973cbc518b0b40c70e83eaa60f53bd6cd91bea --- /dev/null +++ b/internal/platform/database/migrations/0001_init.down.sql @@ -0,0 +1,17 @@ +-- Drop in dependency order: triggers, FTS virtual tables, real tables. +-- Indexes on `sessions` are dropped automatically by DROP TABLE. + +DROP TRIGGER IF EXISTS tool_outputs_fts_delete; +DROP TRIGGER IF EXISTS tool_outputs_fts_update; +DROP TRIGGER IF EXISTS tool_outputs_fts_insert; +DROP TRIGGER IF EXISTS turns_fts_delete; +DROP TRIGGER IF EXISTS turns_fts_update; +DROP TRIGGER IF EXISTS turns_fts_insert; + +DROP TABLE IF EXISTS tool_outputs_fts; +DROP TABLE IF EXISTS turns_fts; + +DROP INDEX IF EXISTS sessions_owner_started; + +DROP TABLE IF EXISTS turns; +DROP TABLE IF EXISTS sessions; diff --git a/internal/platform/database/migrations/0001_init.up.sql b/internal/platform/database/migrations/0001_init.up.sql new file mode 100644 index 0000000000000000000000000000000000000000..546209343be589eeacea0a3fe828c11a258e6e01 --- /dev/null +++ b/internal/platform/database/migrations/0001_init.up.sql @@ -0,0 +1,120 @@ +-- Lethe schema v1. +-- +-- FTS5 mode choice: REGULAR (no `content=` clause). +-- +-- Why not contentless (`content=''`): contentless FTS5 tables do not store +-- UNINDEXED column values either, which defeats `WHERE owner = ?` filtering +-- without a join back to `turns`. UNINDEXED columns are only meaningful in a +-- regular or external-content FTS5 table. +-- +-- Why not external content (`content='turns'`): SQLite's +-- `INSERT ... ON CONFLICT ... DO UPDATE` upsert path fires the UPDATE +-- trigger (not INSERT) when the conflict branch is taken, and across SQLite +-- versions the external-content trigger contract has had subtle issues +-- around UPSERT ordering. Phase 7 (ingest) uses upserts heavily, so we +-- avoid that risk here. +-- +-- Regular FTS5 stores `content` twice (once in `turns`, once in the FTS +-- shadow table); the storage cost is the price for trigger-correctness +-- robustness across SQLite versions. Triggers are the only writers -- +-- never INSERT/UPDATE/DELETE *_fts directly outside this file. +-- +-- Trigger semantics: each trigger is keyed by the source-table rowid (every +-- non-WITHOUT-ROWID table has one). On UPDATE we DELETE then INSERT to +-- avoid an UPDATE-in-place edge case that has historically misbehaved in +-- some FTS5 builds. UPSERT (INSERT ... ON CONFLICT DO UPDATE) fires the +-- UPDATE trigger when the conflict branch is taken; the DELETE+INSERT +-- pattern keeps FTS in sync regardless. + +CREATE TABLE sessions ( + owner TEXT NOT NULL, + tool TEXT NOT NULL, + host TEXT NOT NULL, + session_id TEXT NOT NULL, + started_at INTEGER NOT NULL, + ended_at INTEGER NOT NULL, + working_dir TEXT, + source_file TEXT NOT NULL, + metadata TEXT, + PRIMARY KEY (owner, tool, host, session_id) +); + +CREATE INDEX sessions_owner_started ON sessions(owner, started_at DESC); + +CREATE TABLE turns ( + owner TEXT NOT NULL, + tool TEXT NOT NULL, + host TEXT NOT NULL, + session_id TEXT NOT NULL, + turn_id TEXT NOT NULL, + seq INTEGER NOT NULL, + role TEXT NOT NULL, + timestamp INTEGER NOT NULL, + content TEXT NOT NULL, + model TEXT, + tokens_in INTEGER, + tokens_out INTEGER, + cost_usd REAL, + tool_calls TEXT, + metadata TEXT, + PRIMARY KEY (owner, tool, host, session_id, turn_id), + FOREIGN KEY (owner, tool, host, session_id) + REFERENCES sessions(owner, tool, host, session_id) + ON DELETE CASCADE +); + +-- FTS5 over turn prose `content`. UNINDEXED columns let `WHERE owner = ?` +-- filter pre-FTS without a join back to `turns`. +CREATE VIRTUAL TABLE turns_fts USING fts5 ( + content, + owner UNINDEXED, + tool UNINDEXED, + host UNINDEXED, + session_id UNINDEXED, + turn_id UNINDEXED +); + +CREATE TRIGGER turns_fts_insert AFTER INSERT ON turns BEGIN + INSERT INTO turns_fts(rowid, content, owner, tool, host, session_id, turn_id) + VALUES (new.rowid, new.content, new.owner, new.tool, new.host, new.session_id, new.turn_id); +END; + +CREATE TRIGGER turns_fts_update AFTER UPDATE ON turns BEGIN + DELETE FROM turns_fts WHERE rowid = old.rowid; + INSERT INTO turns_fts(rowid, content, owner, tool, host, session_id, turn_id) + VALUES (new.rowid, new.content, new.owner, new.tool, new.host, new.session_id, new.turn_id); +END; + +CREATE TRIGGER turns_fts_delete AFTER DELETE ON turns BEGIN + DELETE FROM turns_fts WHERE rowid = old.rowid; +END; + +-- FTS5 over `tool_calls` JSON text. INSERT trigger fires only when the +-- column is non-NULL; UPDATE trigger always deletes any prior row, then +-- re-inserts only if NEW.tool_calls is non-NULL (covers all four NULL/ +-- non-NULL transitions). +CREATE VIRTUAL TABLE tool_outputs_fts USING fts5 ( + tool_calls, + owner UNINDEXED, + tool UNINDEXED, + host UNINDEXED, + session_id UNINDEXED, + turn_id UNINDEXED +); + +CREATE TRIGGER tool_outputs_fts_insert AFTER INSERT ON turns +WHEN new.tool_calls IS NOT NULL BEGIN + INSERT INTO tool_outputs_fts(rowid, tool_calls, owner, tool, host, session_id, turn_id) + VALUES (new.rowid, new.tool_calls, new.owner, new.tool, new.host, new.session_id, new.turn_id); +END; + +CREATE TRIGGER tool_outputs_fts_update AFTER UPDATE ON turns BEGIN + DELETE FROM tool_outputs_fts WHERE rowid = old.rowid; + INSERT INTO tool_outputs_fts(rowid, tool_calls, owner, tool, host, session_id, turn_id) + SELECT new.rowid, new.tool_calls, new.owner, new.tool, new.host, new.session_id, new.turn_id + WHERE new.tool_calls IS NOT NULL; +END; + +CREATE TRIGGER tool_outputs_fts_delete AFTER DELETE ON turns BEGIN + DELETE FROM tool_outputs_fts WHERE rowid = old.rowid; +END;