From a1e67ca703ed1c77d8629f2bdbb57668f85c98c2 Mon Sep 17 00:00:00 2001 From: Eugene Blikh Date: Sun, 3 May 2026 14:37:52 +0300 Subject: [PATCH] collector: add Claude Code parser --- docs/TODO.md | 12 +- docs/tasks/lethe-collector-claude-code.md | 12 +- .../collector/parser/claudecode/parser.go | 424 ++++++++++++++++++ .../parser/claudecode/parser_test.go | 178 ++++++++ internal/collector/parser/parser.go | 16 + web/src/routes/auth.callback.tsx | 2 +- 6 files changed, 631 insertions(+), 13 deletions(-) create mode 100644 internal/collector/parser/claudecode/parser.go create mode 100644 internal/collector/parser/claudecode/parser_test.go create mode 100644 internal/collector/parser/parser.go diff --git a/docs/TODO.md b/docs/TODO.md index 251a2549f11a51839428567cec3255758ebd8871..b068f03d2e2d8f99e2b21fd06ed1329f6265a66e 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -32,12 +32,12 @@ These were captured in `lethe-web-ui-foundation.md` Conclusion → Future work / Cross-cutting tooling polish that doesn't warrant a full task file each. Track here so they don't get lost. -- [ ] **`just air` → `just dev`** — match the gabin convention; rename the recipe in `Justfile` and update the README quickstart line. -- [ ] **`air` → `go tool air`** — add `tool github.com/air-verse/air` to `go.mod` (Go 1.24+); the `dev:` recipe runs `go tool air` so contributors don't need a separate `go install`. `.air.toml` is unchanged. -- [ ] **Add `go fix ./...` to `just fmt`** — runs after `goimports`; mostly a no-op today but standardizes future API-rewrite migrations into the existing `just fmt` flow. -- [ ] **`migrate` → `go tool migrate`** — `go get -tool github.com/golang-migrate/migrate/v4/cmd/migrate@latest`; rewrite the three `migrate-*` recipes; drop the now-stale `brew install golang-migrate` comment block. -- [ ] **OIDC stub** — see task #10 (separate task file). -- [ ] **Full web lint unblock** — `npm run lint` currently fails on pre-existing `web/src/routes/auth.callback.tsx:137:16` (`e` unused); changed files from #8 pass targeted ESLint. +- [x] **`just air` → `just dev`** — done; `README.md` quickstart now points at `just dev` and the dev recipe follows that convention. +- [x] **`air` → `go tool air`** — done; `go.mod` carries the `tool github.com/air-verse/air` directive and `dev` / `dev-all` invoke `go tool air`. +- [x] **Add `go fix ./...` to `just fmt`** — done; `fmt` now runs `gofmt`, `goimports`, then `go fix`. +- [x] **`migrate` → `go tool migrate`** — done; all `migrate-*` recipes use `go tool migrate` and the stale install note is gone. +- [x] **OIDC stub** — shipped in task #10 (`lethe-oidc-stub.md`). +- [x] **Full web lint unblock** — fixed the unused catch binding in `web/src/routes/auth.callback.tsx`; `npm run lint` should now pass again. ## Deferred operational follow-ups diff --git a/docs/tasks/lethe-collector-claude-code.md b/docs/tasks/lethe-collector-claude-code.md index 908ac70910dcbea58c491d4120e32e414708e72b..7dd234536dbc143f5ef575d786cb7746226d213b 100644 --- a/docs/tasks/lethe-collector-claude-code.md +++ b/docs/tasks/lethe-collector-claude-code.md @@ -81,14 +81,14 @@ type SourceFile struct { `Parse` returns events in source order with monotonically-increasing `seq`. If a line is malformed, the parser returns it as a `system`-role turn with the raw line in `metadata` (so it shows up in the archive but doesn't poison search) and continues. `newOffset` is the byte position immediately after the last fully-parsed line — never mid-line, so a partial trailing write is left for the next poll. **Claude Code parser specifics.** -- Source root: `~/.claude/projects/`. File pattern: `*/.jsonl` (one file per session). +- Source root: `~/.claude/projects/`. Real corpus includes both `*/.jsonl` and nested `*//subagents/*.jsonl`; ingest every `.jsonl` file as its own session. - `session_id`: the UUID from the filename. The directory name (``) goes into `session_meta.metadata` for project attribution. - One `.jsonl` line = one event, parsed into a permissive struct that uses `json.RawMessage` for any ambiguous field. -- Event-type mapping: - - `type: "user"` → `role: "user"`, `content` = the user message text. - - `type: "assistant"` → `role: "assistant"`, `content` = joined assistant text parts; `model` from the event; `tokens_in/out` from `usage.input_tokens/output_tokens` when present. - - `type: "tool_use"` and `type: "tool_result"` → `role: "tool"`, `content` = a short rendered summary (e.g. `""`), full payload into `tool_calls` JSON. - - `type: "summary"` and unknown types → `role: "system"`, content from event, full event into `metadata`. +- Event-type mapping keys off `message.role` plus nested `message.content[].type`, not just the top-level record `type`: in current Claude logs, tool use lives inside assistant records and tool results live inside user records. + - `message.role: "user"` with string content → `role: "user"`, `content` = the user message text. + - `message.role: "assistant"` with text parts → `role: "assistant"`, `content` = joined assistant text parts; `model` from the event; `tokens_in/out` from `usage.input_tokens/output_tokens` when present. + - `message.content[].type: "tool_use"` and `message.content[].type: "tool_result"` → `role: "tool"`, `content` = a short rendered summary (e.g. `""`), full payload into `tool_calls` JSON. + - Non-turn records (`permission-mode`, `attachment`, `ai-title`, `last-prompt`, etc.) are skipped unless they fail to parse, in which case they degrade to a `system` turn with the raw line in `metadata`. - `cwd` field → `session_meta.working_dir`. The path of the file → `session_meta.source_file`. - `cost_usd` left null (Max-billed sessions don't reliably report cost). - `turn_id`: prefer the event's `uuid` field. When missing, synthesize `sha256(session_id || seq || timestamp || content[:64])` truncated to 16 bytes hex. diff --git a/internal/collector/parser/claudecode/parser.go b/internal/collector/parser/claudecode/parser.go new file mode 100644 index 0000000000000000000000000000000000000000..5680aa6e727ff1bfa8253e83f9008c660ae0fc92 --- /dev/null +++ b/internal/collector/parser/claudecode/parser.go @@ -0,0 +1,424 @@ +package claudecode + +import ( + "bufio" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "sourcecraft.dev/bigbes/lethe/internal/collector/parser" + "sourcecraft.dev/bigbes/lethe/internal/shared/wire" +) + +const toolName = "claude-code" + +// Parser maps Claude Code JSONL transcripts into lethe wire events. +type Parser struct { + host string +} + +// New builds a parser that stamps every emitted event with host. +func New(host string) *Parser { + return &Parser{host: host} +} + +// Tool returns the collector-facing tool name. +func (p *Parser) Tool() string { + return toolName +} + +// Discover walks root recursively and returns every JSONL transcript. +func (p *Parser) Discover(root string) ([]parser.SourceFile, error) { + files := make([]parser.SourceFile, 0) + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if filepath.Ext(path) != ".jsonl" { + return nil + } + info, err := d.Info() + if err != nil { + return err + } + files = append(files, parser.SourceFile{Path: path, Size: info.Size()}) + return nil + }) + if err != nil { + return nil, err + } + sort.Slice(files, func(i, j int) bool { + return files[i].Path < files[j].Path + }) + return files, nil +} + +// Parse reads complete newline-terminated records from path starting at since. +// A partial trailing line is left for the next poll so offsets never land in +// the middle of a JSON object that Claude is still writing. +func (p *Parser) Parse(path string, since int64) ([]wire.TurnEvent, int64, error) { + f, err := os.Open(path) + if err != nil { + return nil, since, err + } + defer func() { _ = f.Close() }() + + info, err := f.Stat() + if err != nil { + return nil, since, err + } + if since < 0 || since > info.Size() { + since = 0 + } + if _, err := f.Seek(since, io.SeekStart); err != nil { + return nil, since, err + } + + r := bufio.NewReaderSize(f, 1<<20) + offset := since + events := make([]wire.TurnEvent, 0) + for { + lineStart := offset + line, err := r.ReadBytes('\n') + switch { + case errors.Is(err, io.EOF) && len(line) == 0: + return events, offset, nil + case errors.Is(err, io.EOF): + return events, offset, nil + case err != nil: + return events, offset, err + } + offset += int64(len(line)) + + recordBytes := strings.TrimSpace(string(line)) + if recordBytes == "" { + continue + } + event, ok := p.mapRecord(path, lineStart, []byte(recordBytes), info.ModTime()) + if ok { + events = append(events, event) + } + } +} + +type transcriptRecord struct { + Type string `json:"type"` + UUID string `json:"uuid"` + Timestamp string `json:"timestamp"` + CWD string `json:"cwd"` + SessionID string `json:"sessionId"` + GitBranch string `json:"gitBranch"` + Version string `json:"version"` + ParentUUID *string `json:"parentUuid"` + SourceToolAssistantUUID string `json:"sourceToolAssistantUUID"` + IsSidechain bool `json:"isSidechain"` + Message *messageRecord `json:"message"` + ToolUseResult json.RawMessage `json:"toolUseResult"` +} + +type messageRecord struct { + Role string `json:"role"` + Model string `json:"model"` + Content json.RawMessage `json:"content"` + Usage *usageRecord `json:"usage"` +} + +type usageRecord struct { + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` +} + +type contentPart struct { + Type string `json:"type"` + Text string `json:"text"` + Name string `json:"name"` + Input json.RawMessage `json:"input"` + Content string `json:"content"` + ToolUseID string `json:"tool_use_id"` + IsError bool `json:"is_error"` + Description string `json:"description"` +} + +func (p *Parser) mapRecord(path string, seq int64, raw []byte, fallbackTime time.Time) (wire.TurnEvent, bool) { + var record transcriptRecord + if err := json.Unmarshal(raw, &record); err != nil { + return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude json: %v", err)), true + } + if record.Message == nil { + return wire.TurnEvent{}, false + } + + timestamp, err := parseTimestamp(record.Timestamp) + if err != nil { + return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude timestamp: %v", err)), true + } + base := wire.TurnEvent{ + Tool: toolName, + Host: p.host, + SessionID: sessionIDFor(path, record.SessionID), + TurnID: turnIDFor(record, seq, timestamp, raw), + Seq: seq, + Timestamp: timestamp, + SessionMeta: wire.SessionMeta{ + WorkingDir: stringPtrOrNil(record.CWD), + SourceFile: path, + }, + Metadata: cloneRaw(raw), + } + + text, parts, err := parseContent(record.Message.Content) + if err != nil { + return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude content: %v", err)), true + } + toolParts := filterParts(parts, "tool_use") + toolResultParts := filterParts(parts, "tool_result") + + switch record.Message.Role { + case "user": + if text != "" { + base.Role = "user" + base.Content = text + return base, true + } + if len(toolResultParts) > 0 { + base.Role = "tool" + base.Content = renderToolResult(record.ToolUseResult, toolResultParts) + base.ToolCalls = toolResultPayload(record.ToolUseResult, toolResultParts) + return base, true + } + case "assistant": + if text != "" { + base.Role = "assistant" + base.Content = text + base.Model = stringPtrOrNil(record.Message.Model) + base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens }) + base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens }) + if len(toolParts) > 0 { + base.ToolCalls = marshalParts(toolParts) + } + return base, true + } + if len(toolParts) > 0 { + base.Role = "tool" + base.Content = renderToolUse(toolParts) + base.Model = stringPtrOrNil(record.Message.Model) + base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens }) + base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens }) + base.ToolCalls = marshalParts(toolParts) + return base, true + } + case "system": + if text != "" { + base.Role = "system" + base.Content = text + return base, true + } + } + + return wire.TurnEvent{}, false +} + +func (p *Parser) systemFallback(path string, seq int64, raw []byte, fallbackTime time.Time, content string) wire.TurnEvent { + timestamp := fallbackTime.Unix() + return wire.TurnEvent{ + Tool: toolName, + Host: p.host, + SessionID: sessionIDFor(path, ""), + TurnID: synthesizedTurnID(sessionIDFor(path, ""), seq, timestamp, raw), + Seq: seq, + Role: "system", + Timestamp: timestamp, + Content: content, + SessionMeta: wire.SessionMeta{ + SourceFile: path, + }, + Metadata: cloneRaw(raw), + } +} + +func parseContent(raw json.RawMessage) (string, []contentPart, error) { + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" || trimmed == "null" { + return "", nil, nil + } + if trimmed[0] == '"' { + var text string + if err := json.Unmarshal(raw, &text); err != nil { + return "", nil, err + } + return strings.TrimSpace(text), nil, nil + } + parts := make([]contentPart, 0) + if err := json.Unmarshal(raw, &parts); err != nil { + return "", nil, err + } + texts := make([]string, 0) + for _, part := range parts { + if part.Type == "text" && strings.TrimSpace(part.Text) != "" { + texts = append(texts, strings.TrimSpace(part.Text)) + } + } + return strings.Join(texts, "\n\n"), parts, nil +} + +func filterParts(parts []contentPart, want string) []contentPart { + out := make([]contentPart, 0) + for _, part := range parts { + if part.Type == want { + out = append(out, part) + } + } + return out +} + +func renderToolUse(parts []contentPart) string { + if len(parts) == 0 { + return "" + } + part := parts[0] + label := part.Name + if label == "" { + label = "tool" + } + var input struct { + Description string `json:"description"` + } + if err := json.Unmarshal(part.Input, &input); err == nil && strings.TrimSpace(input.Description) != "" { + return fmt.Sprintf("", label, strings.TrimSpace(input.Description)) + } + return fmt.Sprintf("", label) +} + +func renderToolResult(toolUseResult json.RawMessage, parts []contentPart) string { + summary := firstToolResultSummary(toolUseResult, parts) + if summary == "" { + return "" + } + return fmt.Sprintf("", summary) +} + +func firstToolResultSummary(toolUseResult json.RawMessage, parts []contentPart) string { + var payload struct { + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + } + if len(toolUseResult) > 0 && json.Unmarshal(toolUseResult, &payload) == nil { + if summary := summarizeText(payload.Stdout); summary != "" { + return summary + } + if summary := summarizeText(payload.Stderr); summary != "" { + return summary + } + } + for _, part := range parts { + if summary := summarizeText(part.Content); summary != "" { + return summary + } + } + return "" +} + +func summarizeText(text string) string { + text = strings.TrimSpace(text) + if text == "" { + return "" + } + line := text + if idx := strings.IndexByte(line, '\n'); idx >= 0 { + line = line[:idx] + } + line = strings.TrimSpace(line) + if len(line) > 120 { + line = line[:117] + "..." + } + return line +} + +func toolResultPayload(toolUseResult json.RawMessage, parts []contentPart) json.RawMessage { + if len(toolUseResult) > 0 { + return cloneRaw(toolUseResult) + } + return marshalParts(parts) +} + +func marshalParts(parts []contentPart) json.RawMessage { + if len(parts) == 0 { + return nil + } + b, err := json.Marshal(parts) + if err != nil { + return nil + } + return b +} + +func parseTimestamp(raw string) (int64, error) { + if strings.TrimSpace(raw) == "" { + return 0, errors.New("missing timestamp") + } + ts, err := time.Parse(time.RFC3339Nano, raw) + if err != nil { + return 0, err + } + return ts.Unix(), nil +} + +func sessionIDFor(path, sessionID string) string { + if strings.TrimSpace(sessionID) != "" { + return sessionID + } + base := filepath.Base(path) + return strings.TrimSuffix(base, filepath.Ext(base)) +} + +func turnIDFor(record transcriptRecord, seq, timestamp int64, raw []byte) string { + if strings.TrimSpace(record.UUID) != "" { + return record.UUID + } + return synthesizedTurnID(sessionIDFor("", record.SessionID), seq, timestamp, raw) +} + +func synthesizedTurnID(sessionID string, seq, timestamp int64, raw []byte) string { + sum := sha256.Sum256([]byte(fmt.Sprintf("%s|%d|%d|%s", sessionID, seq, timestamp, summarizeText(string(raw))))) + return hex.EncodeToString(sum[:8]) +} + +func cloneRaw(raw json.RawMessage) json.RawMessage { + if len(raw) == 0 { + return nil + } + out := make([]byte, len(raw)) + copy(out, raw) + return out +} + +func stringPtrOrNil(value string) *string { + value = strings.TrimSpace(value) + if value == "" { + return nil + } + return &value +} + +func int64PtrOrNil[T any](value *T, get func(*T) int64) *int64 { + if value == nil { + return nil + } + v := get(value) + if v == 0 { + return nil + } + return &v +} diff --git a/internal/collector/parser/claudecode/parser_test.go b/internal/collector/parser/claudecode/parser_test.go new file mode 100644 index 0000000000000000000000000000000000000000..8d5f4ee2bd6a06d640e2195b2bd8a2a5434a6ffa --- /dev/null +++ b/internal/collector/parser/claudecode/parser_test.go @@ -0,0 +1,178 @@ +package claudecode + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestDiscover_FindsJSONLRecursively(t *testing.T) { + root := t.TempDir() + paths := []string{ + filepath.Join(root, "session-a.jsonl"), + filepath.Join(root, "nested", "agent-1.jsonl"), + filepath.Join(root, "nested", "ignore.txt"), + } + for _, path := range paths { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("MkdirAll(%q): %v", path, err) + } + if err := os.WriteFile(path, []byte("fixture\n"), 0o600); err != nil { + t.Fatalf("WriteFile(%q): %v", path, err) + } + } + + p := New("laptop") + files, err := p.Discover(root) + if err != nil { + t.Fatalf("Discover: %v", err) + } + if len(files) != 2 { + t.Fatalf("Discover count = %d, want 2", len(files)) + } + if got, want := files[0].Path, filepath.Join(root, "nested", "agent-1.jsonl"); got != want { + t.Fatalf("files[0].Path = %q, want %q", got, want) + } + if got, want := files[1].Path, filepath.Join(root, "session-a.jsonl"); got != want { + t.Fatalf("files[1].Path = %q, want %q", got, want) + } +} + +func TestParse_MapsClaudeConversationRecords(t *testing.T) { + path := writeTranscript(t, + `{"type":"permission-mode","permissionMode":"default","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f"}`+"\n"+ + `{"parentUuid":null,"isSidechain":false,"promptId":"77bd4c73-a032-45a6-ae10-0cae4c51d522","type":"user","message":{"role":"user","content":"let's configure meli client for gmail email"},"uuid":"4aad19e9-ffe0-47df-a54b-5e5cab1fcb57","timestamp":"2026-05-02T10:01:16.914Z","cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+ + `{"parentUuid":"x","isSidechain":false,"message":{"model":"claude-opus-4-7","type":"message","role":"assistant","content":[{"type":"text","text":"Before configuring, I need a couple of decisions from you."}],"usage":{"input_tokens":1,"output_tokens":42}},"requestId":"req_123","type":"assistant","uuid":"d801eb4d-4a19-4b1e-ad13-09a5a8745ca5","timestamp":"2026-05-02T10:02:10.076Z","cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+ + `{"parentUuid":"x","isSidechain":false,"message":{"model":"claude-opus-4-7","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01R9FjyoMf16Sm9to16YeDUE","name":"ToolSearch","input":{"query":"select:AskUserQuestion","max_results":1,"description":"Load the question tool"}}],"usage":{"input_tokens":1,"output_tokens":8}},"requestId":"req_124","type":"assistant","uuid":"11aba1df-59e0-49ae-aa23-3862a2b517c9","timestamp":"2026-05-02T10:02:10.522Z","cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+ + `{"parentUuid":"x","isSidechain":false,"promptId":"77bd4c73-a032-45a6-ae10-0cae4c51d522","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01R9FjyoMf16Sm9to16YeDUE","type":"tool_result","content":"ToolSearch loaded AskUserQuestion","is_error":false}]},"uuid":"383c9596-a2ff-41be-a540-816798450b5b","timestamp":"2026-05-02T10:02:11.111Z","toolUseResult":{"stdout":"ToolSearch loaded AskUserQuestion","stderr":"","interrupted":false,"isImage":false,"noOutputExpected":false},"cwd":"/Users/blikh/data/home","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f","version":"2.1.126","gitBranch":"HEAD"}`+"\n"+ + `{"type":"ai-title","aiTitle":"Configure meli client for Gmail email","sessionId":"6d3afaab-4fe0-4bfd-a488-b43a9302e17f"}`+"\n", + ) + + p := New("laptop") + events, nextOffset, err := p.Parse(path, 0) + if err != nil { + t.Fatalf("Parse: %v", err) + } + if len(events) != 4 { + t.Fatalf("len(events) = %d, want 4", len(events)) + } + info, err := os.Stat(path) + if err != nil { + t.Fatalf("Stat: %v", err) + } + if nextOffset != info.Size() { + t.Fatalf("nextOffset = %d, want %d", nextOffset, info.Size()) + } + + if got := events[0].Role; got != "user" { + t.Fatalf("events[0].Role = %q, want user", got) + } + if got := events[0].Content; got != "let's configure meli client for gmail email" { + t.Fatalf("events[0].Content = %q", got) + } + if events[0].Host != "laptop" || events[0].Tool != "claude-code" { + t.Fatalf("events[0] identity = %s/%s, want claude-code/laptop", events[0].Tool, events[0].Host) + } + if got := events[0].SessionMeta.SourceFile; got != path { + t.Fatalf("events[0].SessionMeta.SourceFile = %q, want %q", got, path) + } + + if got := events[1].Role; got != "assistant" { + t.Fatalf("events[1].Role = %q, want assistant", got) + } + if got := events[1].Content; got != "Before configuring, I need a couple of decisions from you." { + t.Fatalf("events[1].Content = %q", got) + } + if events[1].Model == nil || *events[1].Model != "claude-opus-4-7" { + t.Fatalf("events[1].Model = %v, want claude-opus-4-7", events[1].Model) + } + if events[1].TokensIn == nil || *events[1].TokensIn != 1 { + t.Fatalf("events[1].TokensIn = %v, want 1", events[1].TokensIn) + } + if events[1].TokensOut == nil || *events[1].TokensOut != 42 { + t.Fatalf("events[1].TokensOut = %v, want 42", events[1].TokensOut) + } + + if got := events[2].Role; got != "tool" { + t.Fatalf("events[2].Role = %q, want tool", got) + } + if got := events[2].Content; got != "" { + t.Fatalf("events[2].Content = %q", got) + } + if !strings.Contains(string(events[2].ToolCalls), `"ToolSearch"`) { + t.Fatalf("events[2].ToolCalls = %s, want ToolSearch payload", string(events[2].ToolCalls)) + } + + if got := events[3].Role; got != "tool" { + t.Fatalf("events[3].Role = %q, want tool", got) + } + if got := events[3].Content; got != "" { + t.Fatalf("events[3].Content = %q", got) + } + if !strings.Contains(string(events[3].ToolCalls), `"stdout":"ToolSearch loaded AskUserQuestion"`) { + t.Fatalf("events[3].ToolCalls = %s, want toolUseResult payload", string(events[3].ToolCalls)) + } +} + +func TestParse_IgnoresPartialTrailingRecordAndResumesFromOffset(t *testing.T) { + path := writeTranscript(t, + `{"type":"user","message":{"role":"user","content":"hello"},"uuid":"u-1","timestamp":"2026-05-03T09:00:00Z","cwd":"/Users/blikh/data/home/lethe","sessionId":"sess-1"}`+"\n"+ + `{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"partial"}],"usage":{"input_tokens":2,"output_tokens":3}},"timestamp":"2026-05-03T09:00:01Z","cwd":"/Users/blikh/data/home/lethe","sessionId":"sess-1"}`, + ) + + p := New("laptop") + events, nextOffset, err := p.Parse(path, 0) + if err != nil { + t.Fatalf("Parse initial: %v", err) + } + if len(events) != 1 { + t.Fatalf("initial len(events) = %d, want 1", len(events)) + } + if got := events[0].Content; got != "hello" { + t.Fatalf("events[0].Content = %q, want hello", got) + } + + f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0) + if err != nil { + t.Fatalf("OpenFile append: %v", err) + } + defer func() { _ = f.Close() }() + if _, err := f.WriteString("\n"); err != nil { + t.Fatalf("append newline: %v", err) + } + + events, finalOffset, err := p.Parse(path, nextOffset) + if err != nil { + t.Fatalf("Parse resumed: %v", err) + } + if len(events) != 1 { + t.Fatalf("resumed len(events) = %d, want 1", len(events)) + } + if got := events[0].Role; got != "assistant" { + t.Fatalf("events[0].Role = %q, want assistant", got) + } + if got := events[0].Content; got != "partial" { + t.Fatalf("events[0].Content = %q, want partial", got) + } + if events[0].TurnID == "" { + t.Fatal("events[0].TurnID is empty, want synthesized ID") + } + info, err := os.Stat(path) + if err != nil { + t.Fatalf("Stat after resume: %v", err) + } + if finalOffset != info.Size() { + t.Fatalf("finalOffset = %d, want %d", finalOffset, info.Size()) + } +} + +func writeTranscript(t *testing.T, body string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "session-1.jsonl") + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + return path +} diff --git a/internal/collector/parser/parser.go b/internal/collector/parser/parser.go new file mode 100644 index 0000000000000000000000000000000000000000..f68d3005b7661db75c316efb0580cf4de88504ec --- /dev/null +++ b/internal/collector/parser/parser.go @@ -0,0 +1,16 @@ +package parser + +import "sourcecraft.dev/bigbes/lethe/internal/shared/wire" + +// SourceFile is a discovered transcript file under a tool-specific source root. +type SourceFile struct { + Path string + Size int64 +} + +// Parser maps one tool's on-disk transcript format into lethe wire events. +type Parser interface { + Tool() string + Discover(root string) ([]SourceFile, error) + Parse(path string, since int64) ([]wire.TurnEvent, int64, error) +} diff --git a/web/src/routes/auth.callback.tsx b/web/src/routes/auth.callback.tsx index 5c7b4dcc05ac3f73c5ebdc6c4d8812925d292f8c..ff57c58bd1f36ed580074c0eee830dcb15cb2e84 100644 --- a/web/src/routes/auth.callback.tsx +++ b/web/src/routes/auth.callback.tsx @@ -134,7 +134,7 @@ function CallbackRoute(): React.JSX.Element { let cfg: ReturnType try { cfg = readConfig() - } catch (e) { + } catch { fail('Auth config missing during token exchange') return }