~bigbes/lethe

430b796df8a8a4c2df3761271a10115b2eeabbba — Eugene Blikh a month ago 1af5bcb
session: extend List response with summary, turn_count, token totals, model

Add five aggregate fields to Session struct (Summary, TurnCount,
TokensInTotal, TokensOutTotal, Model) populated via a new
sessionListSelectColumns const that wraps correlated subqueries.
List uses the new const; Get is unchanged.

TestList_Aggregates covers: zero turns, long-content truncation to 200
chars, mixed-role model tracking (newest turn wins), and NULL token sums.
2 files changed, 198 insertions(+), 12 deletions(-)

M internal/domain/session/repository.go
M internal/domain/session/repository_test.go
M internal/domain/session/repository.go => internal/domain/session/repository.go +55 -12
@@ 94,16 94,26 @@ func (j *JSONText) UnmarshalJSON(b []byte) error {

// Session is the row shape returned by List and embedded in SessionWithTurns.
// JSON tags mirror the wire vocabulary used by the collector and clients.
//
// The five aggregate fields (Summary, TurnCount, TokensInTotal, TokensOutTotal,
// Model) are populated only by the List path via sessionListSelectColumns.
// The Get path uses sessionSelectColumns and leaves them at their zero values
// because Get returns a SessionWithTurns whose Turns slice provides the detail.
type Session struct {
	Owner      string   `db:"owner"        json:"owner"`
	Tool       string   `db:"tool"         json:"tool"`
	Host       string   `db:"host"         json:"host"`
	SessionID  string   `db:"session_id"   json:"session_id"`
	StartedAt  int64    `db:"started_at"   json:"started_at"`
	EndedAt    int64    `db:"ended_at"     json:"ended_at"`
	WorkingDir *string  `db:"working_dir"  json:"working_dir,omitempty"`
	SourceFile string   `db:"source_file"  json:"source_file"`
	Metadata   JSONText `db:"metadata"     json:"metadata,omitempty"`
	Owner          string   `db:"owner"            json:"owner"`
	Tool           string   `db:"tool"             json:"tool"`
	Host           string   `db:"host"             json:"host"`
	SessionID      string   `db:"session_id"       json:"session_id"`
	StartedAt      int64    `db:"started_at"       json:"started_at"`
	EndedAt        int64    `db:"ended_at"         json:"ended_at"`
	WorkingDir     *string  `db:"working_dir"      json:"working_dir,omitempty"`
	SourceFile     string   `db:"source_file"      json:"source_file"`
	Metadata       JSONText `db:"metadata"         json:"metadata,omitempty"`
	Summary        string   `db:"summary"          json:"summary"`
	TurnCount      int64    `db:"turn_count"       json:"turn_count"`
	TokensInTotal  int64    `db:"tokens_in_total"  json:"tokens_in_total"`
	TokensOutTotal int64    `db:"tokens_out_total" json:"tokens_out_total"`
	Model          *string  `db:"model"            json:"model,omitempty"`
}

// Turn is the row shape returned inside SessionWithTurns. Optional columns


@@ 174,10 184,43 @@ type Repository struct {
func (r *Repository) Init(_ context.Context) error { return nil }

// sessionSelectColumns is the canonical column list for SELECTs against
// `sessions`. Centralized so the List and Get queries stay in lock-step with
// the Session struct's `db` tags.
// `sessions`. Used by Get (which returns a SessionWithTurns whose embedded
// Session does not need aggregate columns — the full Turns slice provides that
// detail).
const sessionSelectColumns = `owner, tool, host, session_id, started_at, ended_at, working_dir, source_file, metadata`

// sessionListSelectColumns extends sessionSelectColumns with four correlated
// subqueries and a summary subquery so that List returns aggregate data per
// session in a single round-trip. Column order matches the Session struct's
// `db` tags exactly (base columns first, then the five aggregate columns).
//
// Scope predicate used in every subquery:
//
//	t.owner = sessions.owner AND t.tool = sessions.tool AND
//	t.host = sessions.host AND t.session_id = sessions.session_id
//
// summary      — first 200 chars of the earliest user turn's content; COALESCE
//
//	to '' so the non-pointer string field never receives NULL.
//
// turn_count   — total number of turns (all roles).
// tokens_in_total  — COALESCE(SUM(tokens_in),  0): NULL rows count as 0.
// tokens_out_total — COALESCE(SUM(tokens_out), 0): same.
// model        — model value of the turn with the highest seq (newest).
const sessionListSelectColumns = `owner, tool, host, session_id, started_at, ended_at, working_dir, source_file, metadata,` +
	` COALESCE((SELECT substr(t.content, 1, 200) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id` +
	` AND t.role = 'user' ORDER BY t.seq ASC LIMIT 1), '') AS summary,` +
	` (SELECT COUNT(*) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id) AS turn_count,` +
	` COALESCE((SELECT SUM(t.tokens_in) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id), 0) AS tokens_in_total,` +
	` COALESCE((SELECT SUM(t.tokens_out) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id), 0) AS tokens_out_total,` +
	` (SELECT t.model FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id` +
	` ORDER BY t.seq DESC LIMIT 1) AS model`

// turnSelectColumns mirrors sessionSelectColumns for the `turns` table.
const turnSelectColumns = `owner, tool, host, session_id, turn_id, seq, role, timestamp, content, model, tokens_in, tokens_out, cost_usd, tool_calls, metadata`



@@ 197,7 240,7 @@ func (r *Repository) List(ctx context.Context, f ListFilter) ([]Session, error) 
		args []any
	)
	sb.WriteString("SELECT ")
	sb.WriteString(sessionSelectColumns)
	sb.WriteString(sessionListSelectColumns)
	sb.WriteString(" FROM sessions")

	clauses := make([]string, 0, 5)

M internal/domain/session/repository_test.go => internal/domain/session/repository_test.go +143 -0
@@ 328,6 328,149 @@ func TestGet_AdminAllOwners_FetchesAnyOwner(t *testing.T) {
	}
}

// seedTurnFull inserts a turn row with model, tokens_in, and tokens_out set.
func seedTurnFull(t *testing.T, db *sqlx.DB, owner, tool, host, sid, tid string, seq, ts int64, role, content, model string, tokensIn, tokensOut int64) {
	t.Helper()
	_, err := db.Exec(`
		INSERT INTO turns (owner, tool, host, session_id, turn_id, seq, role, timestamp, content,
		                   model, tokens_in, tokens_out, cost_usd, tool_calls, metadata)
		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL, NULL)`,
		owner, tool, host, sid, tid, seq, role, ts, content, model, tokensIn, tokensOut,
	)
	if err != nil {
		t.Fatalf("seed turn full %s/%s: %v", sid, tid, err)
	}
}

func TestList_Aggregates(t *testing.T) {
	t.Run("zero turns yields empty aggregates", func(t *testing.T) {
		repo, db := newRepo(t)
		seedSession(t, db, "alice", "cc", "phoebe", "s1", 1700000000, 1700000010)

		got, err := repo.List(context.Background(), session.ListFilter{
			Owner: session.OwnerScope{User: "alice"},
			Limit: 50,
		})
		if err != nil {
			t.Fatalf("List: %v", err)
		}
		if len(got) != 1 {
			t.Fatalf("expected 1 session; got %d", len(got))
		}
		s := got[0]
		if s.TurnCount != 0 {
			t.Errorf("TurnCount: got %d; want 0", s.TurnCount)
		}
		if s.TokensInTotal != 0 {
			t.Errorf("TokensInTotal: got %d; want 0", s.TokensInTotal)
		}
		if s.TokensOutTotal != 0 {
			t.Errorf("TokensOutTotal: got %d; want 0", s.TokensOutTotal)
		}
		if s.Summary != "" {
			t.Errorf("Summary: got %q; want empty string", s.Summary)
		}
		if s.Model != nil {
			t.Errorf("Model: got %v; want nil", s.Model)
		}
	})

	t.Run("one user turn long content truncates summary to 200 chars", func(t *testing.T) {
		repo, db := newRepo(t)
		seedSession(t, db, "alice", "cc", "phoebe", "s1", 1700000000, 1700000010)
		longContent := "x" + string(make([]byte, 249)) // 250 chars total ('x' + 249 zero bytes replaced below)
		// Build a deterministic 250-char string.
		longContent = ""
		for i := 0; i < 250; i++ {
			longContent += string(rune('a' + i%26))
		}
		seedTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", 1, 1700000005, "user", longContent)

		got, err := repo.List(context.Background(), session.ListFilter{
			Owner: session.OwnerScope{User: "alice"},
			Limit: 50,
		})
		if err != nil {
			t.Fatalf("List: %v", err)
		}
		if len(got) != 1 {
			t.Fatalf("expected 1 session; got %d", len(got))
		}
		s := got[0]
		if len(s.Summary) != 200 {
			t.Errorf("Summary length: got %d; want 200", len(s.Summary))
		}
		if s.Summary != longContent[:200] {
			t.Errorf("Summary content mismatch: got %q; want %q", s.Summary, longContent[:200])
		}
	})

	t.Run("multiple turns with mixed roles model is newest turn model regardless of role", func(t *testing.T) {
		repo, db := newRepo(t)
		seedSession(t, db, "alice", "cc", "phoebe", "s1", 1700000000, 1700000010)
		// seq 1: user turn (no model)
		seedTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", 1, 1700000001, "user", "hello")
		// seq 2: assistant turn with model "gpt-4"
		seedTurnFull(t, db, "alice", "cc", "phoebe", "s1", "t2", 2, 1700000002, "assistant", "hi there", "gpt-4", 10, 20)
		// seq 3: user turn again (no model — seeded via seedTurn, model stays NULL)
		seedTurn(t, db, "alice", "cc", "phoebe", "s1", "t3", 3, 1700000003, "user", "follow up")
		// seq 4: assistant with newer model "gpt-4o"
		seedTurnFull(t, db, "alice", "cc", "phoebe", "s1", "t4", 4, 1700000004, "assistant", "response", "gpt-4o", 15, 30)

		got, err := repo.List(context.Background(), session.ListFilter{
			Owner: session.OwnerScope{User: "alice"},
			Limit: 50,
		})
		if err != nil {
			t.Fatalf("List: %v", err)
		}
		if len(got) != 1 {
			t.Fatalf("expected 1 session; got %d", len(got))
		}
		s := got[0]
		if s.TurnCount != 4 {
			t.Errorf("TurnCount: got %d; want 4", s.TurnCount)
		}
		if s.Model == nil {
			t.Fatal("Model: got nil; want non-nil")
		}
		if *s.Model != "gpt-4o" {
			t.Errorf("Model: got %q; want %q", *s.Model, "gpt-4o")
		}
	})

	t.Run("NULL tokens_in and tokens_out on some turns treated as zero in sums", func(t *testing.T) {
		repo, db := newRepo(t)
		seedSession(t, db, "alice", "cc", "phoebe", "s1", 1700000000, 1700000010)
		// t1: NULL tokens
		seedTurn(t, db, "alice", "cc", "phoebe", "s1", "t1", 1, 1700000001, "user", "msg1")
		// t2: non-NULL tokens
		seedTurnFull(t, db, "alice", "cc", "phoebe", "s1", "t2", 2, 1700000002, "assistant", "resp1", "gpt-4", 100, 200)
		// t3: NULL tokens
		seedTurn(t, db, "alice", "cc", "phoebe", "s1", "t3", 3, 1700000003, "user", "msg2")
		// t4: non-NULL tokens
		seedTurnFull(t, db, "alice", "cc", "phoebe", "s1", "t4", 4, 1700000004, "assistant", "resp2", "gpt-4", 50, 75)

		got, err := repo.List(context.Background(), session.ListFilter{
			Owner: session.OwnerScope{User: "alice"},
			Limit: 50,
		})
		if err != nil {
			t.Fatalf("List: %v", err)
		}
		if len(got) != 1 {
			t.Fatalf("expected 1 session; got %d", len(got))
		}
		s := got[0]
		if s.TokensInTotal != 150 {
			t.Errorf("TokensInTotal: got %d; want 150", s.TokensInTotal)
		}
		if s.TokensOutTotal != 275 {
			t.Errorf("TokensOutTotal: got %d; want 275", s.TokensOutTotal)
		}
	})
}

// codeOf walks the culpa chain for a CodeDetail and returns the string code,
// or "" if there isn't one. Local helper so tests don't reach into apierror's
// unexported lookup.