~bigbes/lethe

57c0d49169c3d3cdc6da22b536fb19a4e1433a16 — Eugene Blikh 24 days ago da1827c
collector: persist skipped-only parser progress
M docs/tasks/lethe-collector-claude-code.md => docs/tasks/lethe-collector-claude-code.md +1 -0
@@ 322,6 322,7 @@ Smoke: `go run ./cmd/lethe-collector --config ./tmp/collector-smoke.yaml status`
- ureview (final): backfill offset-0 semantics are implemented as `RunBackfillOnce` instead of a mode flag on `RunOnce` — explicit call sites are safer than a boolean parameter that could be misused in daemon loops.
- ureview (final): enforced the outbox size cap before replay and normalized trailing slashes in `server_url` — keeps IV5 and IV9 true for preexisting state and valid-looking URLs.
- ureview (final): normalized sender `serverURL` and enforced outbox cap before every replay to fix IV5/IV9 violations found in review.
- ureview (final): skipped-only parse results (no events but `newOffset > startOffset`) now persist the new offset so the file is not re-parsed forever and status lag clears.

### Deferred (needs user input)


M internal/collector/ingest/runner.go => internal/collector/ingest/runner.go +5 -0
@@ 186,6 186,11 @@ func runFileFromOffset(ctx context.Context, cfg config.Config, src config.Source
		return culpa.Wrap(err, "parse source file")
	}
	if len(events) == 0 {
		if newOffset > startOffset {
			if err := store.SaveOffset(ctx, src.Tool, path, newOffset); err != nil {
				return culpa.Wrap(err, "save skipped-only offset")
			}
		}
		return nil
	}
	stampHost(events, cfg.Host)

M internal/collector/ingest/runner_test.go => internal/collector/ingest/runner_test.go +49 -0
@@ 487,6 487,55 @@ func TestRunBackfillOnce_InterruptedProgressIsResumable(t *testing.T) {
	assertOffset(t, ctx, store, "claude-code", file, 300)
}

func TestRunOnce_SkippedOnlyParseResultPersistsNewOffsetAndDoesNotPost(t *testing.T) {
	ctx := context.Background()
	store := openTestStore(t, ctx)
	source := testSource(t, "claude-code", 10, 4096)
	file := filepath.Join(source.Path, "one.jsonl")

	var posted int
	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		posted++
		w.Header().Set("Content-Type", "application/json")
		_, _ = w.Write([]byte(resultJSON(Result{Accepted: 0})))
	}))
	defer ts.Close()
	sender := NewSender(ts.URL, ts.Client())

	p := newFakeParser("claude-code", []parser.SourceFile{{Path: file}}, map[string]parseResult{
		file: {events: []wire.TurnEvent{}, newOffset: 250},
	})

	err := RunOnce(ctx, testConfig(), source, p, store, sender)
	if err != nil {
		t.Fatalf("RunOnce: %v", err)
	}

	assertOffset(t, ctx, store, "claude-code", file, 250)
	if posted != 0 {
		t.Errorf("POST count = %d, want 0", posted)
	}
}

func TestRunOnce_EmptyParseResultNoProgressDoesNotSaveOffset(t *testing.T) {
	ctx := context.Background()
	store := openTestStore(t, ctx)
	source := testSource(t, "claude-code", 10, 4096)
	file := filepath.Join(source.Path, "one.jsonl")

	p := newFakeParser("claude-code", []parser.SourceFile{{Path: file}}, map[string]parseResult{
		file: {events: []wire.TurnEvent{}, newOffset: 0},
	})
	sender := acceptingSender(t, nil)

	err := RunOnce(ctx, testConfig(), source, p, store, sender)
	if err != nil {
		t.Fatalf("RunOnce: %v", err)
	}

	assertOffset(t, ctx, store, "claude-code", file, 0)
}

type parseResult struct {
	events    []wire.TurnEvent
	newOffset int64