~bigbes/lethe

a1e67ca703ed1c77d8629f2bdbb57668f85c98c2 — Eugene Blikh 24 days ago 1be25ba
collector: add Claude Code parser
M docs/TODO.md => docs/TODO.md +6 -6
@@ 32,12 32,12 @@ These were captured in `lethe-web-ui-foundation.md` Conclusion → Future work /

Cross-cutting tooling polish that doesn't warrant a full task file each. Track here so they don't get lost.

- [ ] **`just air` → `just dev`** — match the gabin convention; rename the recipe in `Justfile` and update the README quickstart line.
- [ ] **`air` → `go tool air`** — add `tool github.com/air-verse/air` to `go.mod` (Go 1.24+); the `dev:` recipe runs `go tool air` so contributors don't need a separate `go install`. `.air.toml` is unchanged.
- [ ] **Add `go fix ./...` to `just fmt`** — runs after `goimports`; mostly a no-op today but standardizes future API-rewrite migrations into the existing `just fmt` flow.
- [ ] **`migrate` → `go tool migrate`** — `go get -tool github.com/golang-migrate/migrate/v4/cmd/migrate@latest`; rewrite the three `migrate-*` recipes; drop the now-stale `brew install golang-migrate` comment block.
- [ ] **OIDC stub** — see task #10 (separate task file).
- [ ] **Full web lint unblock** — `npm run lint` currently fails on pre-existing `web/src/routes/auth.callback.tsx:137:16` (`e` unused); changed files from #8 pass targeted ESLint.
- [x] **`just air` → `just dev`** — done; `README.md` quickstart now points at `just dev` and the dev recipe follows that convention.
- [x] **`air` → `go tool air`** — done; `go.mod` carries the `tool github.com/air-verse/air` directive and `dev` / `dev-all` invoke `go tool air`.
- [x] **Add `go fix ./...` to `just fmt`** — done; `fmt` now runs `gofmt`, `goimports`, then `go fix`.
- [x] **`migrate` → `go tool migrate`** — done; all `migrate-*` recipes use `go tool migrate` and the stale install note is gone.
- [x] **OIDC stub** — shipped in task #10 (`lethe-oidc-stub.md`).
- [x] **Full web lint unblock** — fixed the unused catch binding in `web/src/routes/auth.callback.tsx`; `npm run lint` should now pass again.

## Deferred operational follow-ups


M docs/tasks/lethe-collector-claude-code.md => docs/tasks/lethe-collector-claude-code.md +6 -6
@@ 81,14 81,14 @@ type SourceFile struct {
`Parse` returns events in source order with monotonically-increasing `seq`. If a line is malformed, the parser returns it as a `system`-role turn with the raw line in `metadata` (so it shows up in the archive but doesn't poison search) and continues. `newOffset` is the byte position immediately after the last fully-parsed line — never mid-line, so a partial trailing write is left for the next poll.

**Claude Code parser specifics.**
- Source root: `~/.claude/projects/`. File pattern: `*/<session-uuid>.jsonl` (one file per session).
- Source root: `~/.claude/projects/`. Real corpus includes both `*/<session-uuid>.jsonl` and nested `*/<session-uuid>/subagents/*.jsonl`; ingest every `.jsonl` file as its own session.
- `session_id`: the UUID from the filename. The directory name (`<project-hash>`) goes into `session_meta.metadata` for project attribution.
- One `.jsonl` line = one event, parsed into a permissive struct that uses `json.RawMessage` for any ambiguous field.
- Event-type mapping:
  - `type: "user"` → `role: "user"`, `content` = the user message text.
  - `type: "assistant"` → `role: "assistant"`, `content` = joined assistant text parts; `model` from the event; `tokens_in/out` from `usage.input_tokens/output_tokens` when present.
  - `type: "tool_use"` and `type: "tool_result"` → `role: "tool"`, `content` = a short rendered summary (e.g. `"<tool_use: Read file=...>"`), full payload into `tool_calls` JSON.
  - `type: "summary"` and unknown types → `role: "system"`, content from event, full event into `metadata`.
- Event-type mapping keys off `message.role` plus nested `message.content[].type`, not just the top-level record `type`: in current Claude logs, tool use lives inside assistant records and tool results live inside user records.
  - `message.role: "user"` with string content → `role: "user"`, `content` = the user message text.
  - `message.role: "assistant"` with text parts → `role: "assistant"`, `content` = joined assistant text parts; `model` from the event; `tokens_in/out` from `usage.input_tokens/output_tokens` when present.
  - `message.content[].type: "tool_use"` and `message.content[].type: "tool_result"` → `role: "tool"`, `content` = a short rendered summary (e.g. `"<tool_use: Read file=...>"`), full payload into `tool_calls` JSON.
  - Non-turn records (`permission-mode`, `attachment`, `ai-title`, `last-prompt`, etc.) are skipped unless they fail to parse, in which case they degrade to a `system` turn with the raw line in `metadata`.
- `cwd` field → `session_meta.working_dir`. The path of the file → `session_meta.source_file`.
- `cost_usd` left null (Max-billed sessions don't reliably report cost).
- `turn_id`: prefer the event's `uuid` field. When missing, synthesize `sha256(session_id || seq || timestamp || content[:64])` truncated to 16 bytes hex.

A internal/collector/parser/claudecode/parser.go => internal/collector/parser/claudecode/parser.go +424 -0
@@ 0,0 1,424 @@
package claudecode

import (
	"bufio"
	"crypto/sha256"
	"encoding/hex"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"io/fs"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"time"

	"sourcecraft.dev/bigbes/lethe/internal/collector/parser"
	"sourcecraft.dev/bigbes/lethe/internal/shared/wire"
)

const toolName = "claude-code"

// Parser maps Claude Code JSONL transcripts into lethe wire events.
type Parser struct {
	host string
}

// New builds a parser that stamps every emitted event with host.
func New(host string) *Parser {
	return &Parser{host: host}
}

// Tool returns the collector-facing tool name.
func (p *Parser) Tool() string {
	return toolName
}

// Discover walks root recursively and returns every JSONL transcript.
func (p *Parser) Discover(root string) ([]parser.SourceFile, error) {
	files := make([]parser.SourceFile, 0)
	err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			return err
		}
		if d.IsDir() {
			return nil
		}
		if filepath.Ext(path) != ".jsonl" {
			return nil
		}
		info, err := d.Info()
		if err != nil {
			return err
		}
		files = append(files, parser.SourceFile{Path: path, Size: info.Size()})
		return nil
	})
	if err != nil {
		return nil, err
	}
	sort.Slice(files, func(i, j int) bool {
		return files[i].Path < files[j].Path
	})
	return files, nil
}

// Parse reads complete newline-terminated records from path starting at since.
// A partial trailing line is left for the next poll so offsets never land in
// the middle of a JSON object that Claude is still writing.
func (p *Parser) Parse(path string, since int64) ([]wire.TurnEvent, int64, error) {
	f, err := os.Open(path)
	if err != nil {
		return nil, since, err
	}
	defer func() { _ = f.Close() }()

	info, err := f.Stat()
	if err != nil {
		return nil, since, err
	}
	if since < 0 || since > info.Size() {
		since = 0
	}
	if _, err := f.Seek(since, io.SeekStart); err != nil {
		return nil, since, err
	}

	r := bufio.NewReaderSize(f, 1<<20)
	offset := since
	events := make([]wire.TurnEvent, 0)
	for {
		lineStart := offset
		line, err := r.ReadBytes('\n')
		switch {
		case errors.Is(err, io.EOF) && len(line) == 0:
			return events, offset, nil
		case errors.Is(err, io.EOF):
			return events, offset, nil
		case err != nil:
			return events, offset, err
		}
		offset += int64(len(line))

		recordBytes := strings.TrimSpace(string(line))
		if recordBytes == "" {
			continue
		}
		event, ok := p.mapRecord(path, lineStart, []byte(recordBytes), info.ModTime())
		if ok {
			events = append(events, event)
		}
	}
}

type transcriptRecord struct {
	Type                    string          `json:"type"`
	UUID                    string          `json:"uuid"`
	Timestamp               string          `json:"timestamp"`
	CWD                     string          `json:"cwd"`
	SessionID               string          `json:"sessionId"`
	GitBranch               string          `json:"gitBranch"`
	Version                 string          `json:"version"`
	ParentUUID              *string         `json:"parentUuid"`
	SourceToolAssistantUUID string          `json:"sourceToolAssistantUUID"`
	IsSidechain             bool            `json:"isSidechain"`
	Message                 *messageRecord  `json:"message"`
	ToolUseResult           json.RawMessage `json:"toolUseResult"`
}

type messageRecord struct {
	Role    string          `json:"role"`
	Model   string          `json:"model"`
	Content json.RawMessage `json:"content"`
	Usage   *usageRecord    `json:"usage"`
}

type usageRecord struct {
	InputTokens  int64 `json:"input_tokens"`
	OutputTokens int64 `json:"output_tokens"`
}

type contentPart struct {
	Type        string          `json:"type"`
	Text        string          `json:"text"`
	Name        string          `json:"name"`
	Input       json.RawMessage `json:"input"`
	Content     string          `json:"content"`
	ToolUseID   string          `json:"tool_use_id"`
	IsError     bool            `json:"is_error"`
	Description string          `json:"description"`
}

func (p *Parser) mapRecord(path string, seq int64, raw []byte, fallbackTime time.Time) (wire.TurnEvent, bool) {
	var record transcriptRecord
	if err := json.Unmarshal(raw, &record); err != nil {
		return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude json: %v", err)), true
	}
	if record.Message == nil {
		return wire.TurnEvent{}, false
	}

	timestamp, err := parseTimestamp(record.Timestamp)
	if err != nil {
		return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude timestamp: %v", err)), true
	}
	base := wire.TurnEvent{
		Tool:      toolName,
		Host:      p.host,
		SessionID: sessionIDFor(path, record.SessionID),
		TurnID:    turnIDFor(record, seq, timestamp, raw),
		Seq:       seq,
		Timestamp: timestamp,
		SessionMeta: wire.SessionMeta{
			WorkingDir: stringPtrOrNil(record.CWD),
			SourceFile: path,
		},
		Metadata: cloneRaw(raw),
	}

	text, parts, err := parseContent(record.Message.Content)
	if err != nil {
		return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude content: %v", err)), true
	}
	toolParts := filterParts(parts, "tool_use")
	toolResultParts := filterParts(parts, "tool_result")

	switch record.Message.Role {
	case "user":
		if text != "" {
			base.Role = "user"
			base.Content = text
			return base, true
		}
		if len(toolResultParts) > 0 {
			base.Role = "tool"
			base.Content = renderToolResult(record.ToolUseResult, toolResultParts)
			base.ToolCalls = toolResultPayload(record.ToolUseResult, toolResultParts)
			return base, true
		}
	case "assistant":
		if text != "" {
			base.Role = "assistant"
			base.Content = text
			base.Model = stringPtrOrNil(record.Message.Model)
			base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens })
			base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens })
			if len(toolParts) > 0 {
				base.ToolCalls = marshalParts(toolParts)
			}
			return base, true
		}
		if len(toolParts) > 0 {
			base.Role = "tool"
			base.Content = renderToolUse(toolParts)
			base.Model = stringPtrOrNil(record.Message.Model)
			base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens })
			base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens })
			base.ToolCalls = marshalParts(toolParts)
			return base, true
		}
	case "system":
		if text != "" {
			base.Role = "system"
			base.Content = text
			return base, true
		}
	}

	return wire.TurnEvent{}, false
}

func (p *Parser) systemFallback(path string, seq int64, raw []byte, fallbackTime time.Time, content string) wire.TurnEvent {
	timestamp := fallbackTime.Unix()
	return wire.TurnEvent{
		Tool:      toolName,
		Host:      p.host,
		SessionID: sessionIDFor(path, ""),
		TurnID:    synthesizedTurnID(sessionIDFor(path, ""), seq, timestamp, raw),
		Seq:       seq,
		Role:      "system",
		Timestamp: timestamp,
		Content:   content,
		SessionMeta: wire.SessionMeta{
			SourceFile: path,
		},
		Metadata: cloneRaw(raw),
	}
}

func parseContent(raw json.RawMessage) (string, []contentPart, error) {
	trimmed := strings.TrimSpace(string(raw))
	if trimmed == "" || trimmed == "null" {
		return "", nil, nil
	}
	if trimmed[0] == '"' {
		var text string
		if err := json.Unmarshal(raw, &text); err != nil {
			return "", nil, err
		}
		return strings.TrimSpace(text), nil, nil
	}
	parts := make([]contentPart, 0)
	if err := json.Unmarshal(raw, &parts); err != nil {
		return "", nil, err
	}
	texts := make([]string, 0)
	for _, part := range parts {
		if part.Type == "text" && strings.TrimSpace(part.Text) != "" {
			texts = append(texts, strings.TrimSpace(part.Text))
		}
	}
	return strings.Join(texts, "\n\n"), parts, nil
}

func filterParts(parts []contentPart, want string) []contentPart {
	out := make([]contentPart, 0)
	for _, part := range parts {
		if part.Type == want {
			out = append(out, part)
		}
	}
	return out
}

func renderToolUse(parts []contentPart) string {
	if len(parts) == 0 {
		return "<tool_use>"
	}
	part := parts[0]
	label := part.Name
	if label == "" {
		label = "tool"
	}
	var input struct {
		Description string `json:"description"`
	}
	if err := json.Unmarshal(part.Input, &input); err == nil && strings.TrimSpace(input.Description) != "" {
		return fmt.Sprintf("<tool_use: %s - %s>", label, strings.TrimSpace(input.Description))
	}
	return fmt.Sprintf("<tool_use: %s>", label)
}

func renderToolResult(toolUseResult json.RawMessage, parts []contentPart) string {
	summary := firstToolResultSummary(toolUseResult, parts)
	if summary == "" {
		return "<tool_result>"
	}
	return fmt.Sprintf("<tool_result: %s>", summary)
}

func firstToolResultSummary(toolUseResult json.RawMessage, parts []contentPart) string {
	var payload struct {
		Stdout string `json:"stdout"`
		Stderr string `json:"stderr"`
	}
	if len(toolUseResult) > 0 && json.Unmarshal(toolUseResult, &payload) == nil {
		if summary := summarizeText(payload.Stdout); summary != "" {
			return summary
		}
		if summary := summarizeText(payload.Stderr); summary != "" {
			return summary
		}
	}
	for _, part := range parts {
		if summary := summarizeText(part.Content); summary != "" {
			return summary
		}
	}
	return ""
}

func summarizeText(text string) string {
	text = strings.TrimSpace(text)
	if text == "" {
		return ""
	}
	line := text
	if idx := strings.IndexByte(line, '\n'); idx >= 0 {
		line = line[:idx]
	}
	line = strings.TrimSpace(line)
	if len(line) > 120 {
		line = line[:117] + "..."
	}
	return line
}

func toolResultPayload(toolUseResult json.RawMessage, parts []contentPart) json.RawMessage {
	if len(toolUseResult) > 0 {
		return cloneRaw(toolUseResult)
	}
	return marshalParts(parts)
}

func marshalParts(parts []contentPart) json.RawMessage {
	if len(parts) == 0 {
		return nil
	}
	b, err := json.Marshal(parts)
	if err != nil {
		return nil
	}
	return b
}

func parseTimestamp(raw string) (int64, error) {
	if strings.TrimSpace(raw) == "" {
		return 0, errors.New("missing timestamp")
	}
	ts, err := time.Parse(time.RFC3339Nano, raw)
	if err != nil {
		return 0, err
	}
	return ts.Unix(), nil
}

func sessionIDFor(path, sessionID string) string {
	if strings.TrimSpace(sessionID) != "" {
		return sessionID
	}
	base := filepath.Base(path)
	return strings.TrimSuffix(base, filepath.Ext(base))
}

func turnIDFor(record transcriptRecord, seq, timestamp int64, raw []byte) string {
	if strings.TrimSpace(record.UUID) != "" {
		return record.UUID
	}
	return synthesizedTurnID(sessionIDFor("", record.SessionID), seq, timestamp, raw)
}

func synthesizedTurnID(sessionID string, seq, timestamp int64, raw []byte) string {
	sum := sha256.Sum256([]byte(fmt.Sprintf("%s|%d|%d|%s", sessionID, seq, timestamp, summarizeText(string(raw)))))
	return hex.EncodeToString(sum[:8])
}

func cloneRaw(raw json.RawMessage) json.RawMessage {
	if len(raw) == 0 {
		return nil
	}
	out := make([]byte, len(raw))
	copy(out, raw)
	return out
}

func stringPtrOrNil(value string) *string {
	value = strings.TrimSpace(value)
	if value == "" {
		return nil
	}
	return &value
}

func int64PtrOrNil[T any](value *T, get func(*T) int64) *int64 {
	if value == nil {
		return nil
	}
	v := get(value)
	if v == 0 {
		return nil
	}
	return &v
}

A internal/collector/parser/claudecode/parser_test.go => internal/collector/parser/claudecode/parser_test.go +178 -0
@@ 0,0 1,178 @@
package claudecode

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

func TestDiscover_FindsJSONLRecursively(t *testing.T) {
	root := t.TempDir()
	paths := []string{
		filepath.Join(root, "session-a.jsonl"),
		filepath.Join(root, "nested", "agent-1.jsonl"),
		filepath.Join(root, "nested", "ignore.txt"),
	}
	for _, path := range paths {
		if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
			t.Fatalf("MkdirAll(%q): %v", path, err)
		}
		if err := os.WriteFile(path, []byte("fixture\n"), 0o600); err != nil {
			t.Fatalf("WriteFile(%q): %v", path, err)
		}
	}

	p := New("laptop")
	files, err := p.Discover(root)
	if err != nil {
		t.Fatalf("Discover: %v", err)
	}
	if len(files) != 2 {
		t.Fatalf("Discover count = %d, want 2", len(files))
	}
	if got, want := files[0].Path, filepath.Join(root, "nested", "agent-1.jsonl"); got != want {
		t.Fatalf("files[0].Path = %q, want %q", got, want)
	}
	if got, want := files[1].Path, filepath.Join(root, "session-a.jsonl"); got != want {
		t.Fatalf("files[1].Path = %q, want %q", got, want)
	}
}

func TestParse_MapsClaudeConversationRecords(t *testing.T) {
	path := writeTranscript(t,
		`{"type":"permission-mode","permissionMode":"default","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f"}`+"\n"+
		`{"parentUuid":null,"isSidechain":false,"promptId":"77bd4c73-a032-45a6-ae10-0cae4c51d522","type":"user","message":{"role":"user","content":"let's configure meli client for gmail email"},"uuid":"4aad19e9-ffe0-47df-a54b-5e5cab1fcb57","timestamp":"2026-05-02T10:01:16.914Z","cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+
		`{"parentUuid":"x","isSidechain":false,"message":{"model":"claude-opus-4-7","type":"message","role":"assistant","content":[{"type":"text","text":"Before configuring, I need a couple of decisions from you."}],"usage":{"input_tokens":1,"output_tokens":42}},"requestId":"req_123","type":"assistant","uuid":"d801eb4d-4a19-4b1e-ad13-09a5a8745ca5","timestamp":"2026-05-02T10:02:10.076Z","cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+
		`{"parentUuid":"x","isSidechain":false,"message":{"model":"claude-opus-4-7","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01R9FjyoMf16Sm9to16YeDUE","name":"ToolSearch","input":{"query":"select:AskUserQuestion","max_results":1,"description":"Load the question tool"}}],"usage":{"input_tokens":1,"output_tokens":8}},"requestId":"req_124","type":"assistant","uuid":"11aba1df-59e0-49ae-aa23-3862a2b517c9","timestamp":"2026-05-02T10:02:10.522Z","cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+
		`{"parentUuid":"x","isSidechain":false,"promptId":"77bd4c73-a032-45a6-ae10-0cae4c51d522","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01R9FjyoMf16Sm9to16YeDUE","type":"tool_result","content":"ToolSearch loaded AskUserQuestion","is_error":false}]},"uuid":"383c9596-a2ff-41be-a540-816798450b5b","timestamp":"2026-05-02T10:02:11.111Z","toolUseResult":{"stdout":"ToolSearch loaded AskUserQuestion","stderr":"","interrupted":false,"isImage":false,"noOutputExpected":false},"cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+
		`{"type":"ai-title","aiTitle":"Configure meli client for Gmail email","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f"}`+"\n",
	)

	p := New("laptop")
	events, nextOffset, err := p.Parse(path, 0)
	if err != nil {
		t.Fatalf("Parse: %v", err)
	}
	if len(events) != 4 {
		t.Fatalf("len(events) = %d, want 4", len(events))
	}
	info, err := os.Stat(path)
	if err != nil {
		t.Fatalf("Stat: %v", err)
	}
	if nextOffset != info.Size() {
		t.Fatalf("nextOffset = %d, want %d", nextOffset, info.Size())
	}

	if got := events[0].Role; got != "user" {
		t.Fatalf("events[0].Role = %q, want user", got)
	}
	if got := events[0].Content; got != "let's configure meli client for gmail email" {
		t.Fatalf("events[0].Content = %q", got)
	}
	if events[0].Host != "laptop" || events[0].Tool != "claude-code" {
		t.Fatalf("events[0] identity = %s/%s, want claude-code/laptop", events[0].Tool, events[0].Host)
	}
	if got := events[0].SessionMeta.SourceFile; got != path {
		t.Fatalf("events[0].SessionMeta.SourceFile = %q, want %q", got, path)
	}

	if got := events[1].Role; got != "assistant" {
		t.Fatalf("events[1].Role = %q, want assistant", got)
	}
	if got := events[1].Content; got != "Before configuring, I need a couple of decisions from you." {
		t.Fatalf("events[1].Content = %q", got)
	}
	if events[1].Model == nil || *events[1].Model != "claude-opus-4-7" {
		t.Fatalf("events[1].Model = %v, want claude-opus-4-7", events[1].Model)
	}
	if events[1].TokensIn == nil || *events[1].TokensIn != 1 {
		t.Fatalf("events[1].TokensIn = %v, want 1", events[1].TokensIn)
	}
	if events[1].TokensOut == nil || *events[1].TokensOut != 42 {
		t.Fatalf("events[1].TokensOut = %v, want 42", events[1].TokensOut)
	}

	if got := events[2].Role; got != "tool" {
		t.Fatalf("events[2].Role = %q, want tool", got)
	}
	if got := events[2].Content; got != "<tool_use: ToolSearch - Load the question tool>" {
		t.Fatalf("events[2].Content = %q", got)
	}
	if !strings.Contains(string(events[2].ToolCalls), `"ToolSearch"`) {
		t.Fatalf("events[2].ToolCalls = %s, want ToolSearch payload", string(events[2].ToolCalls))
	}

	if got := events[3].Role; got != "tool" {
		t.Fatalf("events[3].Role = %q, want tool", got)
	}
	if got := events[3].Content; got != "<tool_result: ToolSearch loaded AskUserQuestion>" {
		t.Fatalf("events[3].Content = %q", got)
	}
	if !strings.Contains(string(events[3].ToolCalls), `"stdout":"ToolSearch loaded AskUserQuestion"`) {
		t.Fatalf("events[3].ToolCalls = %s, want toolUseResult payload", string(events[3].ToolCalls))
	}
}

func TestParse_IgnoresPartialTrailingRecordAndResumesFromOffset(t *testing.T) {
	path := writeTranscript(t,
		`{"type":"user","message":{"role":"user","content":"hello"},"uuid":"u-1","timestamp":"2026-05-03T09:00:00Z","cwd":"/Users/blikh/data/home/lethe","sessionId":"sess-1"}`+"\n"+
		`{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"partial"}],"usage":{"input_tokens":2,"output_tokens":3}},"timestamp":"2026-05-03T09:00:01Z","cwd":"/Users/blikh/data/home/lethe","sessionId":"sess-1"}`,
	)

	p := New("laptop")
	events, nextOffset, err := p.Parse(path, 0)
	if err != nil {
		t.Fatalf("Parse initial: %v", err)
	}
	if len(events) != 1 {
		t.Fatalf("initial len(events) = %d, want 1", len(events))
	}
	if got := events[0].Content; got != "hello" {
		t.Fatalf("events[0].Content = %q, want hello", got)
	}

	f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0)
	if err != nil {
		t.Fatalf("OpenFile append: %v", err)
	}
	defer func() { _ = f.Close() }()
	if _, err := f.WriteString("\n"); err != nil {
		t.Fatalf("append newline: %v", err)
	}

	events, finalOffset, err := p.Parse(path, nextOffset)
	if err != nil {
		t.Fatalf("Parse resumed: %v", err)
	}
	if len(events) != 1 {
		t.Fatalf("resumed len(events) = %d, want 1", len(events))
	}
	if got := events[0].Role; got != "assistant" {
		t.Fatalf("events[0].Role = %q, want assistant", got)
	}
	if got := events[0].Content; got != "partial" {
		t.Fatalf("events[0].Content = %q, want partial", got)
	}
	if events[0].TurnID == "" {
		t.Fatal("events[0].TurnID is empty, want synthesized ID")
	}
	info, err := os.Stat(path)
	if err != nil {
		t.Fatalf("Stat after resume: %v", err)
	}
	if finalOffset != info.Size() {
		t.Fatalf("finalOffset = %d, want %d", finalOffset, info.Size())
	}
}

func writeTranscript(t *testing.T, body string) string {
	t.Helper()
	dir := t.TempDir()
	path := filepath.Join(dir, "session-1.jsonl")
	if err := os.WriteFile(path, []byte(body), 0o600); err != nil {
		t.Fatalf("WriteFile: %v", err)
	}
	return path
}

A internal/collector/parser/parser.go => internal/collector/parser/parser.go +16 -0
@@ 0,0 1,16 @@
package parser

import "sourcecraft.dev/bigbes/lethe/internal/shared/wire"

// SourceFile is a discovered transcript file under a tool-specific source root.
type SourceFile struct {
	Path string
	Size int64
}

// Parser maps one tool's on-disk transcript format into lethe wire events.
type Parser interface {
	Tool() string
	Discover(root string) ([]SourceFile, error)
	Parse(path string, since int64) ([]wire.TurnEvent, int64, error)
}

M web/src/routes/auth.callback.tsx => web/src/routes/auth.callback.tsx +1 -1
@@ 134,7 134,7 @@ function CallbackRoute(): React.JSX.Element {
      let cfg: ReturnType<typeof readConfig>
      try {
        cfg = readConfig()
      } catch (e) {
      } catch {
        fail('Auth config missing during token exchange')
        return
      }