package claudecode import ( "bufio" "crypto/sha256" "encoding/hex" "encoding/json" "errors" "fmt" "io" "io/fs" "os" "path/filepath" "sort" "strings" "time" "sourcecraft.dev/bigbes/lethe/internal/collector/parser" "sourcecraft.dev/bigbes/lethe/internal/shared/wire" ) const toolName = "claude-code" // Parser maps Claude Code JSONL transcripts into lethe wire events. type Parser struct { host string } // New builds a parser that stamps every emitted event with host. func New(host string) *Parser { return &Parser{host: host} } // Tool returns the collector-facing tool name. func (p *Parser) Tool() string { return toolName } // Discover walks root recursively and returns every JSONL transcript. func (p *Parser) Discover(root string) ([]parser.SourceFile, error) { files := make([]parser.SourceFile, 0) err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { return nil } if filepath.Ext(path) != ".jsonl" { return nil } info, err := d.Info() if err != nil { return err } files = append(files, parser.SourceFile{Path: path, Size: info.Size()}) return nil }) if err != nil { return nil, err } sort.Slice(files, func(i, j int) bool { return files[i].Path < files[j].Path }) return files, nil } // Parse reads complete newline-terminated records from path starting at since. // A partial trailing line is left for the next poll so offsets never land in // the middle of a JSON object that Claude is still writing. func (p *Parser) Parse(path string, since int64) ([]wire.TurnEvent, int64, error) { f, err := os.Open(path) if err != nil { return nil, since, err } defer func() { _ = f.Close() }() info, err := f.Stat() if err != nil { return nil, since, err } if since < 0 || since > info.Size() { since = 0 } if _, err := f.Seek(since, io.SeekStart); err != nil { return nil, since, err } r := bufio.NewReaderSize(f, 1<<20) offset := since events := make([]wire.TurnEvent, 0) for { lineStart := offset line, err := r.ReadBytes('\n') switch { case errors.Is(err, io.EOF) && len(line) == 0: return events, offset, nil case errors.Is(err, io.EOF): return events, offset, nil case err != nil: return events, offset, err } offset += int64(len(line)) recordBytes := strings.TrimSpace(string(line)) if recordBytes == "" { continue } event, ok := p.mapRecord(path, lineStart, []byte(recordBytes), info.ModTime()) if ok { events = append(events, event) } } } type transcriptRecord struct { Type string `json:"type"` UUID string `json:"uuid"` Timestamp string `json:"timestamp"` CWD string `json:"cwd"` SessionID string `json:"sessionId"` GitBranch string `json:"gitBranch"` Version string `json:"version"` ParentUUID *string `json:"parentUuid"` SourceToolAssistantUUID string `json:"sourceToolAssistantUUID"` IsSidechain bool `json:"isSidechain"` Message *messageRecord `json:"message"` ToolUseResult json.RawMessage `json:"toolUseResult"` } type messageRecord struct { Role string `json:"role"` Model string `json:"model"` Content json.RawMessage `json:"content"` Usage *usageRecord `json:"usage"` } type usageRecord struct { InputTokens int64 `json:"input_tokens"` OutputTokens int64 `json:"output_tokens"` } type contentPart struct { Type string `json:"type"` Text string `json:"text"` Name string `json:"name"` Input json.RawMessage `json:"input"` Content string `json:"content"` ToolUseID string `json:"tool_use_id"` IsError bool `json:"is_error"` Description string `json:"description"` } func (p *Parser) mapRecord(path string, seq int64, raw []byte, fallbackTime time.Time) (wire.TurnEvent, bool) { var record transcriptRecord if err := json.Unmarshal(raw, &record); err != nil { return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude json: %v", err)), true } if record.Message == nil { return wire.TurnEvent{}, false } timestamp, err := parseTimestamp(record.Timestamp) if err != nil { return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude timestamp: %v", err)), true } base := wire.TurnEvent{ Tool: toolName, Host: p.host, SessionID: sessionIDFor(path, record.SessionID), TurnID: turnIDFor(record, seq, timestamp, raw), Seq: seq, Timestamp: timestamp, SessionMeta: wire.SessionMeta{ WorkingDir: stringPtrOrNil(record.CWD), SourceFile: path, }, Metadata: cloneRaw(raw), } text, parts, err := parseContent(record.Message.Content) if err != nil { return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude content: %v", err)), true } toolParts := filterParts(parts, "tool_use") toolResultParts := filterParts(parts, "tool_result") switch record.Message.Role { case "user": if text != "" { base.Role = "user" base.Content = text return base, true } if len(toolResultParts) > 0 { base.Role = "tool" base.Content = renderToolResult(record.ToolUseResult, toolResultParts) base.ToolCalls = toolResultPayload(record.ToolUseResult, toolResultParts) return base, true } case "assistant": if text != "" { base.Role = "assistant" base.Content = text base.Model = stringPtrOrNil(record.Message.Model) base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens }) base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens }) if len(toolParts) > 0 { base.ToolCalls = marshalParts(toolParts) } return base, true } if len(toolParts) > 0 { base.Role = "tool" base.Content = renderToolUse(toolParts) base.Model = stringPtrOrNil(record.Message.Model) base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens }) base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens }) base.ToolCalls = marshalParts(toolParts) return base, true } case "system": if text != "" { base.Role = "system" base.Content = text return base, true } } return wire.TurnEvent{}, false } func (p *Parser) systemFallback(path string, seq int64, raw []byte, fallbackTime time.Time, content string) wire.TurnEvent { timestamp := fallbackTime.Unix() return wire.TurnEvent{ Tool: toolName, Host: p.host, SessionID: sessionIDFor(path, ""), TurnID: synthesizedTurnID(sessionIDFor(path, ""), seq, timestamp, raw), Seq: seq, Role: "system", Timestamp: timestamp, Content: content, SessionMeta: wire.SessionMeta{ SourceFile: path, }, Metadata: cloneRaw(raw), } } func parseContent(raw json.RawMessage) (string, []contentPart, error) { trimmed := strings.TrimSpace(string(raw)) if trimmed == "" || trimmed == "null" { return "", nil, nil } if trimmed[0] == '"' { var text string if err := json.Unmarshal(raw, &text); err != nil { return "", nil, err } return strings.TrimSpace(text), nil, nil } parts := make([]contentPart, 0) if err := json.Unmarshal(raw, &parts); err != nil { return "", nil, err } texts := make([]string, 0) for _, part := range parts { if part.Type == "text" && strings.TrimSpace(part.Text) != "" { texts = append(texts, strings.TrimSpace(part.Text)) } } return strings.Join(texts, "\n\n"), parts, nil } func filterParts(parts []contentPart, want string) []contentPart { out := make([]contentPart, 0) for _, part := range parts { if part.Type == want { out = append(out, part) } } return out } func renderToolUse(parts []contentPart) string { if len(parts) == 0 { return "" } part := parts[0] label := part.Name if label == "" { label = "tool" } var input struct { Description string `json:"description"` } if err := json.Unmarshal(part.Input, &input); err == nil && strings.TrimSpace(input.Description) != "" { return fmt.Sprintf("", label, strings.TrimSpace(input.Description)) } return fmt.Sprintf("", label) } func renderToolResult(toolUseResult json.RawMessage, parts []contentPart) string { summary := firstToolResultSummary(toolUseResult, parts) if summary == "" { return "" } return fmt.Sprintf("", summary) } func firstToolResultSummary(toolUseResult json.RawMessage, parts []contentPart) string { var payload struct { Stdout string `json:"stdout"` Stderr string `json:"stderr"` } if len(toolUseResult) > 0 && json.Unmarshal(toolUseResult, &payload) == nil { if summary := summarizeText(payload.Stdout); summary != "" { return summary } if summary := summarizeText(payload.Stderr); summary != "" { return summary } } for _, part := range parts { if summary := summarizeText(part.Content); summary != "" { return summary } } return "" } func summarizeText(text string) string { text = strings.TrimSpace(text) if text == "" { return "" } line := text if idx := strings.IndexByte(line, '\n'); idx >= 0 { line = line[:idx] } line = strings.TrimSpace(line) if len(line) > 120 { line = line[:117] + "..." } return line } func toolResultPayload(toolUseResult json.RawMessage, parts []contentPart) json.RawMessage { if len(toolUseResult) > 0 { return cloneRaw(toolUseResult) } return marshalParts(parts) } func marshalParts(parts []contentPart) json.RawMessage { if len(parts) == 0 { return nil } b, err := json.Marshal(parts) if err != nil { return nil } return b } func parseTimestamp(raw string) (int64, error) { if strings.TrimSpace(raw) == "" { return 0, errors.New("missing timestamp") } ts, err := time.Parse(time.RFC3339Nano, raw) if err != nil { return 0, err } return ts.Unix(), nil } func sessionIDFor(path, sessionID string) string { if strings.TrimSpace(sessionID) != "" { return sessionID } base := filepath.Base(path) return strings.TrimSuffix(base, filepath.Ext(base)) } func turnIDFor(record transcriptRecord, seq, timestamp int64, raw []byte) string { if strings.TrimSpace(record.UUID) != "" { return record.UUID } return synthesizedTurnID(sessionIDFor("", record.SessionID), seq, timestamp, raw) } func synthesizedTurnID(sessionID string, seq, timestamp int64, raw []byte) string { sum := sha256.Sum256([]byte(fmt.Sprintf("%s|%d|%d|%s", sessionID, seq, timestamp, summarizeText(string(raw))))) return hex.EncodeToString(sum[:8]) } func cloneRaw(raw json.RawMessage) json.RawMessage { if len(raw) == 0 { return nil } out := make([]byte, len(raw)) copy(out, raw) return out } func stringPtrOrNil(value string) *string { value = strings.TrimSpace(value) if value == "" { return nil } return &value } func int64PtrOrNil[T any](value *T, get func(*T) int64) *int64 { if value == nil { return nil } v := get(value) if v == 0 { return nil } return &v }