~bigbes/lethe

ref: 859d3fd879fffd81f7f72b5511e9fec061e3288d lethe/internal/domain/session/repository.go -rw-r--r-- 13.7 KiB
859d3fd8 — Eugene Blikh auth: lift oidc test stub into internal/testutil/oidcstub a month ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
// Package session implements the read-only sessions API: list with filters
// and detail-with-turns. The package layers as Repository (raw SQL) and
// Handler (HTTP boundary). Both are steward services; the Repository owns
// the SQL composition and the per-owner isolation invariant, while the
// Handler resolves the owner scope from the authenticated identity and
// translates errors into RFC 7807 problems.
//
// The owner-scope resolution rules (locked):
//   - Default scope is `WHERE owner = <current user>`. Non-admins always read
//     their own data only.
//   - Admins may pass `?owner=<user>` to scope to that user, or `?owner=*` to
//     read across all owners (no WHERE clause on owner).
//   - Non-admin requests that include `?owner=` (any value, including their
//     own user) are 403 — the parameter is admin-only.
//   - A Get for another owner's session returns 404 (not 403). Returning a
//     distinct status would leak existence across tenants.
package session

import (
	"context"
	"database/sql"
	"database/sql/driver"
	"encoding/json"
	"errors"
	"fmt"
	"strings"

	"go.bigb.es/auxilia/culpa"

	"sourcecraft.dev/bigbes/lethe/internal/platform/database"
)

// JSONText is a thin wrapper around the raw JSON bytes stored in TEXT columns
// (sessions.metadata, turns.tool_calls, turns.metadata). The SQLite driver
// does not know how to scan NULL into json.RawMessage directly, and the
// stored values may legitimately be NULL — Scan handles that by leaving the
// underlying slice nil. JSON marshal omits the field entirely (`,omitempty`)
// when the slice is empty, so a NULL column becomes an absent JSON key.
type JSONText []byte

// Scan implements sql.Scanner so NULL TEXT columns become a nil []byte.
// Non-NULL values are copied (the driver's buffer is not safe to retain).
func (j *JSONText) Scan(src any) error {
	if src == nil {
		*j = nil
		return nil
	}
	switch v := src.(type) {
	case []byte:
		buf := make([]byte, len(v))
		copy(buf, v)
		*j = buf
		return nil
	case string:
		*j = []byte(v)
		return nil
	default:
		return fmt.Errorf("session: cannot scan %T into JSONText", src)
	}
}

// Value implements driver.Valuer; included for completeness so the type can
// flow back through any future writes (Phase 8 itself is read-only).
func (j JSONText) Value() (driver.Value, error) {
	if len(j) == 0 {
		return nil, nil
	}
	return string(j), nil
}

// MarshalJSON returns the stored bytes verbatim (or `null` if empty) so the
// JSON output preserves whatever the collector originally produced.
func (j JSONText) MarshalJSON() ([]byte, error) {
	if len(j) == 0 {
		return []byte("null"), nil
	}
	// Validate that the stored bytes are syntactically valid JSON before
	// emitting them — guards against a corrupt row turning a 200 into an
	// HTTP-level encoding panic.
	if !json.Valid(j) {
		return nil, fmt.Errorf("session: stored JSON is invalid")
	}
	return []byte(j), nil
}

// UnmarshalJSON stores the raw bytes verbatim, mirroring json.RawMessage.
func (j *JSONText) UnmarshalJSON(b []byte) error {
	if j == nil {
		return errors.New("session.JSONText: UnmarshalJSON on nil pointer")
	}
	*j = append((*j)[0:0], b...)
	return nil
}

// Session is the row shape returned by List and embedded in SessionWithTurns.
// JSON tags mirror the wire vocabulary used by the collector and clients.
//
// The five aggregate fields (Summary, TurnCount, TokensInTotal, TokensOutTotal,
// Model) are populated only by the List path via sessionListSelectColumns.
// The Get path uses sessionSelectColumns and leaves them at their zero values
// because Get returns a SessionWithTurns whose Turns slice provides the detail.
type Session struct {
	Owner          string   `db:"owner"            json:"owner"`
	Tool           string   `db:"tool"             json:"tool"`
	Host           string   `db:"host"             json:"host"`
	SessionID      string   `db:"session_id"       json:"session_id"`
	StartedAt      int64    `db:"started_at"       json:"started_at"`
	EndedAt        int64    `db:"ended_at"         json:"ended_at"`
	WorkingDir     *string  `db:"working_dir"      json:"working_dir,omitempty"`
	SourceFile     string   `db:"source_file"      json:"source_file"`
	Metadata       JSONText `db:"metadata"         json:"metadata,omitempty"`
	Summary        string   `db:"summary"          json:"summary"`
	TurnCount      int64    `db:"turn_count"       json:"turn_count"`
	TokensInTotal  int64    `db:"tokens_in_total"  json:"tokens_in_total"`
	TokensOutTotal int64    `db:"tokens_out_total" json:"tokens_out_total"`
	Model          *string  `db:"model"            json:"model,omitempty"`
}

// Turn is the row shape returned inside SessionWithTurns. Optional columns
// (model, tokens_in/out, cost_usd, tool_calls, metadata) are nullable in the
// schema and are exposed as pointers / RawMessage so callers can distinguish
// "absent" from "zero".
type Turn struct {
	Owner     string   `db:"owner"       json:"owner"`
	Tool      string   `db:"tool"        json:"tool"`
	Host      string   `db:"host"        json:"host"`
	SessionID string   `db:"session_id"  json:"session_id"`
	TurnID    string   `db:"turn_id"     json:"turn_id"`
	Seq       int64    `db:"seq"         json:"seq"`
	Role      string   `db:"role"        json:"role"`
	Timestamp int64    `db:"timestamp"   json:"timestamp"`
	Content   string   `db:"content"     json:"content"`
	Model     *string  `db:"model"       json:"model,omitempty"`
	TokensIn  *int64   `db:"tokens_in"   json:"tokens_in,omitempty"`
	TokensOut *int64   `db:"tokens_out"  json:"tokens_out,omitempty"`
	CostUSD   *float64 `db:"cost_usd"    json:"cost_usd,omitempty"`
	ToolCalls JSONText `db:"tool_calls"  json:"tool_calls,omitempty"`
	Metadata  JSONText `db:"metadata"    json:"metadata,omitempty"`
}

// SessionWithTurns is the response shape for Get. Session is embedded so the
// JSON output flattens the session columns at the top level alongside the
// "turns" array.
type SessionWithTurns struct {
	Session
	Turns []Turn `json:"turns"`
}

// OwnerScope is resolved by the Handler from the authenticated identity and
// the optional `?owner=` query parameter. It is the only knob the Repository
// has for tightening or widening the owner WHERE clause.
//
// Exactly one of the three states is meaningful per call:
//   - AllOwners=true               → no WHERE clause on owner (admin + ?owner=*)
//   - SpecificOwner != nil         → WHERE owner = *SpecificOwner (admin + ?owner=u)
//   - otherwise                    → WHERE owner = User (default; non-admins always)
type OwnerScope struct {
	User          string
	AllOwners     bool
	SpecificOwner *string
}

// ListFilter aggregates every option List supports. The Handler clamps Limit
// and Offset before constructing this struct; the Repository assumes both are
// already in the safe range.
type ListFilter struct {
	Owner  OwnerScope
	Tool   *string
	Host   *string
	Cwd    *string
	Since  *int64
	Until  *int64
	Limit  int
	Offset int
}

// Repository is the SQL steward for the sessions read API. It is stateless
// beyond its injected dependencies; Init is empty.
type Repository struct {
	Database *database.Database `inject:""`
}

// Init satisfies the steward Initer contract. Nothing to set up — the
// underlying *sqlx.DB is owned by the Database steward.
func (r *Repository) Init(_ context.Context) error { return nil }

// sessionSelectColumns is the canonical column list for SELECTs against
// `sessions`. Used by Get (which returns a SessionWithTurns whose embedded
// Session does not need aggregate columns — the full Turns slice provides that
// detail).
const sessionSelectColumns = `owner, tool, host, session_id, started_at, ended_at, working_dir, source_file, metadata`

// sessionListSelectColumns extends sessionSelectColumns with four correlated
// subqueries and a summary subquery so that List returns aggregate data per
// session in a single round-trip. Column order matches the Session struct's
// `db` tags exactly (base columns first, then the five aggregate columns).
//
// Scope predicate used in every subquery:
//
//	t.owner = sessions.owner AND t.tool = sessions.tool AND
//	t.host = sessions.host AND t.session_id = sessions.session_id
//
// summary      — first 200 chars of the earliest user turn's content; COALESCE
//
//	to '' so the non-pointer string field never receives NULL.
//
// turn_count   — total number of turns (all roles).
// tokens_in_total  — COALESCE(SUM(tokens_in),  0): NULL rows count as 0.
// tokens_out_total — COALESCE(SUM(tokens_out), 0): same.
// model        — model value of the turn with the highest seq (newest).
const sessionListSelectColumns = `owner, tool, host, session_id, started_at, ended_at, working_dir, source_file, metadata,` +
	` COALESCE((SELECT substr(t.content, 1, 200) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id` +
	` AND t.role = 'user' ORDER BY t.seq ASC LIMIT 1), '') AS summary,` +
	` (SELECT COUNT(*) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id) AS turn_count,` +
	` COALESCE((SELECT SUM(t.tokens_in) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id), 0) AS tokens_in_total,` +
	` COALESCE((SELECT SUM(t.tokens_out) FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id), 0) AS tokens_out_total,` +
	` (SELECT t.model FROM turns t` +
	` WHERE t.owner = sessions.owner AND t.tool = sessions.tool AND t.host = sessions.host AND t.session_id = sessions.session_id` +
	` ORDER BY t.seq DESC LIMIT 1) AS model`

// turnSelectColumns mirrors sessionSelectColumns for the `turns` table.
const turnSelectColumns = `owner, tool, host, session_id, turn_id, seq, role, timestamp, content, model, tokens_in, tokens_out, cost_usd, tool_calls, metadata`

// List runs the dynamic-WHERE list query. The owner clause is built first
// (per OwnerScope), then optional filters are appended in fixed order; only
// values reach the driver via "?" placeholders — column names and the
// AND-skeleton are constructed from string literals never derived from input.
//
// Ordering is `started_at DESC, session_id DESC`: the secondary key keeps
// pagination deterministic when two sessions share a started_at.
//
// An empty result set returns a non-nil zero-length slice. Callers that
// JSON-encode the slice get `[]` rather than `null`.
func (r *Repository) List(ctx context.Context, f ListFilter) ([]Session, error) {
	var (
		sb   strings.Builder
		args []any
	)
	sb.WriteString("SELECT ")
	sb.WriteString(sessionListSelectColumns)
	sb.WriteString(" FROM sessions")

	clauses := make([]string, 0, 5)
	switch {
	case f.Owner.AllOwners:
		// no owner clause
	case f.Owner.SpecificOwner != nil:
		clauses = append(clauses, "owner = ?")
		args = append(args, *f.Owner.SpecificOwner)
	default:
		clauses = append(clauses, "owner = ?")
		args = append(args, f.Owner.User)
	}
	if f.Tool != nil {
		clauses = append(clauses, "tool = ?")
		args = append(args, *f.Tool)
	}
	if f.Host != nil {
		clauses = append(clauses, "host = ?")
		args = append(args, *f.Host)
	}
	if f.Cwd != nil {
		clauses = append(clauses, "working_dir = ?")
		args = append(args, *f.Cwd)
	}
	if f.Since != nil {
		clauses = append(clauses, "started_at >= ?")
		args = append(args, *f.Since)
	}
	if f.Until != nil {
		clauses = append(clauses, "started_at <= ?")
		args = append(args, *f.Until)
	}
	if len(clauses) > 0 {
		sb.WriteString(" WHERE ")
		sb.WriteString(strings.Join(clauses, " AND "))
	}
	sb.WriteString(" ORDER BY started_at DESC, session_id DESC LIMIT ? OFFSET ?")
	args = append(args, f.Limit, f.Offset)

	out := make([]Session, 0)
	if err := r.Database.DB.SelectContext(ctx, &out, sb.String(), args...); err != nil {
		return nil, culpa.WithCode(culpa.Wrap(err, "list sessions"), "DB_QUERY")
	}
	return out, nil
}

// Get returns the named session and its turns in seq order. The owner clause
// is built from scope identically to List: AllOwners means no clause,
// SpecificOwner pins the row, otherwise the current user is the only allowed
// owner. A miss for any of those reasons returns NOT_FOUND — never 403 —
// because differentiating "wrong owner" from "no such session" would leak
// existence across tenants.
//
// The turns query uses the resolved session's owner (read off the loaded
// row), keeping the result set internally consistent even under AllOwners.
func (r *Repository) Get(ctx context.Context, scope OwnerScope, tool, host, sessionID string) (*SessionWithTurns, error) {
	var (
		sb   strings.Builder
		args []any
	)
	sb.WriteString("SELECT ")
	sb.WriteString(sessionSelectColumns)
	sb.WriteString(" FROM sessions WHERE ")

	switch {
	case scope.AllOwners:
		// no owner clause
	case scope.SpecificOwner != nil:
		sb.WriteString("owner = ? AND ")
		args = append(args, *scope.SpecificOwner)
	default:
		sb.WriteString("owner = ? AND ")
		args = append(args, scope.User)
	}
	sb.WriteString("tool = ? AND host = ? AND session_id = ?")
	args = append(args, tool, host, sessionID)

	var s Session
	if err := r.Database.DB.GetContext(ctx, &s, sb.String(), args...); err != nil {
		if errors.Is(err, sql.ErrNoRows) {
			return nil, culpa.WithCode(culpa.New("session not found"), "NOT_FOUND")
		}
		return nil, culpa.WithCode(culpa.Wrap(err, "get session"), "DB_QUERY")
	}

	turns := make([]Turn, 0)
	const turnsQuery = "SELECT " + turnSelectColumns + " FROM turns WHERE owner = ? AND tool = ? AND host = ? AND session_id = ? ORDER BY seq ASC"
	if err := r.Database.DB.SelectContext(ctx, &turns, turnsQuery, s.Owner, s.Tool, s.Host, s.SessionID); err != nil {
		return nil, culpa.WithCode(culpa.Wrap(err, "list session turns"), "DB_QUERY")
	}
	return &SessionWithTurns{Session: s, Turns: turns}, nil
}