package claudecode
import (
"bufio"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"sort"
"strings"
"time"
"sourcecraft.dev/bigbes/lethe/internal/collector/parser"
"sourcecraft.dev/bigbes/lethe/internal/shared/wire"
)
const toolName = "claude-code"
// Parser maps Claude Code JSONL transcripts into lethe wire events.
type Parser struct {
host string
}
// New builds a parser that stamps every emitted event with host.
func New(host string) *Parser {
return &Parser{host: host}
}
// Tool returns the collector-facing tool name.
func (p *Parser) Tool() string {
return toolName
}
// Discover walks root recursively and returns every JSONL transcript.
func (p *Parser) Discover(root string) ([]parser.SourceFile, error) {
files := make([]parser.SourceFile, 0)
err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
if filepath.Ext(path) != ".jsonl" {
return nil
}
info, err := d.Info()
if err != nil {
return err
}
files = append(files, parser.SourceFile{Path: path, Size: info.Size()})
return nil
})
if err != nil {
return nil, err
}
sort.Slice(files, func(i, j int) bool {
return files[i].Path < files[j].Path
})
return files, nil
}
// Parse reads complete newline-terminated records from path starting at since.
// A partial trailing line is left for the next poll so offsets never land in
// the middle of a JSON object that Claude is still writing.
func (p *Parser) Parse(path string, since int64) ([]wire.TurnEvent, int64, error) {
f, err := os.Open(path)
if err != nil {
return nil, since, err
}
defer func() { _ = f.Close() }()
info, err := f.Stat()
if err != nil {
return nil, since, err
}
if since < 0 || since > info.Size() {
since = 0
}
if _, err := f.Seek(since, io.SeekStart); err != nil {
return nil, since, err
}
r := bufio.NewReaderSize(f, 1<<20)
offset := since
events := make([]wire.TurnEvent, 0)
for {
lineStart := offset
line, err := r.ReadBytes('\n')
switch {
case errors.Is(err, io.EOF) && len(line) == 0:
return events, offset, nil
case errors.Is(err, io.EOF):
return events, offset, nil
case err != nil:
return events, offset, err
}
offset += int64(len(line))
recordBytes := strings.TrimSpace(string(line))
if recordBytes == "" {
continue
}
event, ok := p.mapRecord(path, lineStart, []byte(recordBytes), info.ModTime())
if ok {
events = append(events, event)
}
}
}
type transcriptRecord struct {
Type string `json:"type"`
UUID string `json:"uuid"`
Timestamp string `json:"timestamp"`
CWD string `json:"cwd"`
SessionID string `json:"sessionId"`
GitBranch string `json:"gitBranch"`
Version string `json:"version"`
ParentUUID *string `json:"parentUuid"`
SourceToolAssistantUUID string `json:"sourceToolAssistantUUID"`
IsSidechain bool `json:"isSidechain"`
Message *messageRecord `json:"message"`
ToolUseResult json.RawMessage `json:"toolUseResult"`
}
type messageRecord struct {
Role string `json:"role"`
Model string `json:"model"`
Content json.RawMessage `json:"content"`
Usage *usageRecord `json:"usage"`
}
type usageRecord struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
}
type contentPart struct {
Type string `json:"type"`
Text string `json:"text"`
Name string `json:"name"`
Input json.RawMessage `json:"input"`
Content string `json:"content"`
ToolUseID string `json:"tool_use_id"`
IsError bool `json:"is_error"`
Description string `json:"description"`
}
func (p *Parser) mapRecord(path string, seq int64, raw []byte, fallbackTime time.Time) (wire.TurnEvent, bool) {
var record transcriptRecord
if err := json.Unmarshal(raw, &record); err != nil {
return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude json: %v", err)), true
}
if record.Message == nil {
return wire.TurnEvent{}, false
}
timestamp, err := parseTimestamp(record.Timestamp)
if err != nil {
return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude timestamp: %v", err)), true
}
base := wire.TurnEvent{
Tool: toolName,
Host: p.host,
SessionID: sessionIDFor(path, record.SessionID),
TurnID: turnIDFor(record, seq, timestamp, raw),
Seq: seq,
Timestamp: timestamp,
SessionMeta: wire.SessionMeta{
WorkingDir: stringPtrOrNil(record.CWD),
SourceFile: path,
},
Metadata: cloneRaw(raw),
}
text, parts, err := parseContent(record.Message.Content)
if err != nil {
return p.systemFallback(path, seq, raw, fallbackTime, fmt.Sprintf("invalid claude content: %v", err)), true
}
toolParts := filterParts(parts, "tool_use")
toolResultParts := filterParts(parts, "tool_result")
switch record.Message.Role {
case "user":
if text != "" {
base.Role = "user"
base.Content = text
return base, true
}
if len(toolResultParts) > 0 {
base.Role = "tool"
base.Content = renderToolResult(record.ToolUseResult, toolResultParts)
base.ToolCalls = toolResultPayload(record.ToolUseResult, toolResultParts)
return base, true
}
case "assistant":
if text != "" {
base.Role = "assistant"
base.Content = text
base.Model = stringPtrOrNil(record.Message.Model)
base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens })
base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens })
if len(toolParts) > 0 {
base.ToolCalls = marshalParts(toolParts)
}
return base, true
}
if len(toolParts) > 0 {
base.Role = "tool"
base.Content = renderToolUse(toolParts)
base.Model = stringPtrOrNil(record.Message.Model)
base.TokensIn = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.InputTokens })
base.TokensOut = int64PtrOrNil(record.Message.Usage, func(u *usageRecord) int64 { return u.OutputTokens })
base.ToolCalls = marshalParts(toolParts)
return base, true
}
case "system":
if text != "" {
base.Role = "system"
base.Content = text
return base, true
}
}
return wire.TurnEvent{}, false
}
func (p *Parser) systemFallback(path string, seq int64, raw []byte, fallbackTime time.Time, content string) wire.TurnEvent {
timestamp := fallbackTime.Unix()
return wire.TurnEvent{
Tool: toolName,
Host: p.host,
SessionID: sessionIDFor(path, ""),
TurnID: synthesizedTurnID(sessionIDFor(path, ""), seq, timestamp, raw),
Seq: seq,
Role: "system",
Timestamp: timestamp,
Content: content,
SessionMeta: wire.SessionMeta{
SourceFile: path,
},
Metadata: cloneRaw(raw),
}
}
func parseContent(raw json.RawMessage) (string, []contentPart, error) {
trimmed := strings.TrimSpace(string(raw))
if trimmed == "" || trimmed == "null" {
return "", nil, nil
}
if trimmed[0] == '"' {
var text string
if err := json.Unmarshal(raw, &text); err != nil {
return "", nil, err
}
return strings.TrimSpace(text), nil, nil
}
parts := make([]contentPart, 0)
if err := json.Unmarshal(raw, &parts); err != nil {
return "", nil, err
}
texts := make([]string, 0)
for _, part := range parts {
if part.Type == "text" && strings.TrimSpace(part.Text) != "" {
texts = append(texts, strings.TrimSpace(part.Text))
}
}
return strings.Join(texts, "\n\n"), parts, nil
}
func filterParts(parts []contentPart, want string) []contentPart {
out := make([]contentPart, 0)
for _, part := range parts {
if part.Type == want {
out = append(out, part)
}
}
return out
}
func renderToolUse(parts []contentPart) string {
if len(parts) == 0 {
return "<tool_use>"
}
part := parts[0]
label := part.Name
if label == "" {
label = "tool"
}
var input struct {
Description string `json:"description"`
}
if err := json.Unmarshal(part.Input, &input); err == nil && strings.TrimSpace(input.Description) != "" {
return fmt.Sprintf("<tool_use: %s - %s>", label, strings.TrimSpace(input.Description))
}
return fmt.Sprintf("<tool_use: %s>", label)
}
func renderToolResult(toolUseResult json.RawMessage, parts []contentPart) string {
summary := firstToolResultSummary(toolUseResult, parts)
if summary == "" {
return "<tool_result>"
}
return fmt.Sprintf("<tool_result: %s>", summary)
}
func firstToolResultSummary(toolUseResult json.RawMessage, parts []contentPart) string {
var payload struct {
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
}
if len(toolUseResult) > 0 && json.Unmarshal(toolUseResult, &payload) == nil {
if summary := summarizeText(payload.Stdout); summary != "" {
return summary
}
if summary := summarizeText(payload.Stderr); summary != "" {
return summary
}
}
for _, part := range parts {
if summary := summarizeText(part.Content); summary != "" {
return summary
}
}
return ""
}
func summarizeText(text string) string {
text = strings.TrimSpace(text)
if text == "" {
return ""
}
line := text
if idx := strings.IndexByte(line, '\n'); idx >= 0 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if len(line) > 120 {
line = line[:117] + "..."
}
return line
}
func toolResultPayload(toolUseResult json.RawMessage, parts []contentPart) json.RawMessage {
if len(toolUseResult) > 0 {
return cloneRaw(toolUseResult)
}
return marshalParts(parts)
}
func marshalParts(parts []contentPart) json.RawMessage {
if len(parts) == 0 {
return nil
}
b, err := json.Marshal(parts)
if err != nil {
return nil
}
return b
}
func parseTimestamp(raw string) (int64, error) {
if strings.TrimSpace(raw) == "" {
return 0, errors.New("missing timestamp")
}
ts, err := time.Parse(time.RFC3339Nano, raw)
if err != nil {
return 0, err
}
return ts.Unix(), nil
}
func sessionIDFor(path, sessionID string) string {
if strings.TrimSpace(sessionID) != "" {
return sessionID
}
base := filepath.Base(path)
return strings.TrimSuffix(base, filepath.Ext(base))
}
func turnIDFor(record transcriptRecord, seq, timestamp int64, raw []byte) string {
if strings.TrimSpace(record.UUID) != "" {
return record.UUID
}
return synthesizedTurnID(sessionIDFor("", record.SessionID), seq, timestamp, raw)
}
func synthesizedTurnID(sessionID string, seq, timestamp int64, raw []byte) string {
sum := sha256.Sum256([]byte(fmt.Sprintf("%s|%d|%d|%s", sessionID, seq, timestamp, summarizeText(string(raw)))))
return hex.EncodeToString(sum[:8])
}
func cloneRaw(raw json.RawMessage) json.RawMessage {
if len(raw) == 0 {
return nil
}
out := make([]byte, len(raw))
copy(out, raw)
return out
}
func stringPtrOrNil(value string) *string {
value = strings.TrimSpace(value)
if value == "" {
return nil
}
return &value
}
func int64PtrOrNil[T any](value *T, get func(*T) int64) *int64 {
if value == nil {
return nil
}
v := get(value)
if v == 0 {
return nil
}
return &v
}