~bigbes/lethe

ref: 2d9d2b8ec08ee09cc64c5d925ab85716b1d7d1fb lethe/cmd/lethe/main.go -rw-r--r-- 7.9 KiB
2d9d2b8e — Eugene Blikh search: add /api/v1/search API and opencode collector parser 23 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// Command lethe is the lethe server binary. main.go is a thin shell: it loads
// the configuration, registers every steward asset that makes up the running
// server, and orchestrates the lifecycle (Inject -> Init -> Start -> wait for
// signal -> Stop -> Destroy). All business logic lives in the assets.
//
// Steward unwind compensation: per the Phase 4 finding, steward.Manager does
// NOT call Destroy on already-init'd siblings when a later component's Init
// returns an error. main keeps a parallel destroyer slice in registration
// order; on Init/Start failure it walks the slice in reverse and calls Destroy
// directly, swallowing individual errors so the rest of the cleanup proceeds.
package main

import (
	"context"
	"flag"
	"fmt"
	"log/slog"
	"os"
	"os/signal"
	"syscall"
	"time"

	"go.bigb.es/auxilia/scribe"
	"go.bigb.es/auxilia/steward"

	"sourcecraft.dev/bigbes/lethe/internal/config"
	"sourcecraft.dev/bigbes/lethe/internal/domain/ingest"
	"sourcecraft.dev/bigbes/lethe/internal/domain/project"
	"sourcecraft.dev/bigbes/lethe/internal/domain/savedsearch"
	"sourcecraft.dev/bigbes/lethe/internal/domain/search"
	"sourcecraft.dev/bigbes/lethe/internal/domain/session"
	"sourcecraft.dev/bigbes/lethe/internal/domain/stats"
	"sourcecraft.dev/bigbes/lethe/internal/platform/database"
	"sourcecraft.dev/bigbes/lethe/internal/platform/health"
	"sourcecraft.dev/bigbes/lethe/internal/platform/observability"
	"sourcecraft.dev/bigbes/lethe/internal/server"
	authpkg "sourcecraft.dev/bigbes/lethe/internal/server/auth"
)

const (
	version            = "0.1.0-dev"
	shutdownGrace      = 15 * time.Second
	perDestroyTimeout  = 5 * time.Second
	exitOK             = 0
	exitConfigError    = 1
	exitLifecycleError = 2
)

// destroyer is the local interface every steward asset that holds resources
// implements. The unwind compensator narrows registered services to this
// interface via type-assert; assets that hold no resources are skipped.
type destroyer interface {
	Destroy(context.Context) error
}

func main() {
	os.Exit(run())
}

// run is the testable entry point. It returns an exit code rather than
// calling os.Exit directly so future tests (or a smoke harness) can exercise
// startup and shutdown without tearing down the test binary.
func run() int {
	configPath := flag.String("config", "config.yaml", "path to YAML config file")
	flag.Parse()

	// Bootstrap a stderr logger before anything else so the unwind path always
	// has a working slog.Default(). The Logger asset's Init (Phase 4) replaces
	// this with the configured handler once it runs.
	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})))

	slog.Info("lethe starting", slog.String("version", version), slog.String("config", *configPath))

	cfg, err := config.Load(*configPath)
	if err != nil {
		slog.Error("load config", scribe.Err(err))
		return exitConfigError
	}

	// signal.NotifyContext converts SIGINT/SIGTERM into ctx cancellation. The
	// stop closure is also called explicitly when we want to release the OS
	// signal handlers (e.g. before the shutdown phase).
	ctx, stopSignals := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
	defer stopSignals()

	mgr := steward.NewManager()

	// Underlying service struct pointers are tracked here in registration
	// order so the unwind compensator can walk them in reverse on Init/Start
	// failure.
	var (
		loggerSvc    = &observability.Logger{}
		metricsSvc   = &observability.Metrics{}
		dbSvc        = &database.Database{}
		dbCheckSvc   = &health.DBCheck{}
		healthSetSvc = &health.Set{}
		authSvc      = &authpkg.Authenticator{}
		ingestRepo   = &ingest.Repository{}
		ingestSvc    = &ingest.Service{}
		ingestHnd    = &ingest.Handler{}
		sessionRepo  = &session.Repository{}
		sessionHnd   = &session.Handler{}
		projectRepo  = &project.Repository{}
		projectHnd   = &project.Handler{}
		statsRepo       = &stats.Repository{}
		statsHnd        = &stats.Handler{}
		savedSearchRepo = &savedsearch.Repository{}
		savedSearchHnd  = &savedsearch.Handler{}
		searchRepo      = &search.Repository{}
		searchHnd       = &search.Handler{}
		serverSvc       = &server.Server{}
	)

	registered := []any{
		loggerSvc, metricsSvc, dbSvc, dbCheckSvc, healthSetSvc,
		authSvc, ingestRepo, ingestSvc, ingestHnd,
		sessionRepo, sessionHnd, projectRepo, projectHnd, statsRepo, statsHnd,
		savedSearchRepo, savedSearchHnd, searchRepo, searchHnd, serverSvc,
	}

	mgr.AddComponent(ctx,
		steward.MustConfigurationAsset(cfg),
		steward.MustServiceAsset(loggerSvc),
		steward.MustServiceAsset(metricsSvc),
		steward.MustServiceAsset(dbSvc),
		steward.MustServiceAsset(dbCheckSvc),
		steward.MustServiceAsset(healthSetSvc),
		steward.MustServiceAsset(authSvc),
		steward.MustServiceAsset(ingestRepo),
		steward.MustServiceAsset(ingestSvc),
		steward.MustServiceAsset(ingestHnd),
		steward.MustServiceAsset(sessionRepo),
		steward.MustServiceAsset(sessionHnd),
		steward.MustServiceAsset(projectRepo),
		steward.MustServiceAsset(projectHnd),
		steward.MustServiceAsset(statsRepo),
		steward.MustServiceAsset(statsHnd),
		steward.MustServiceAsset(savedSearchRepo),
		steward.MustServiceAsset(savedSearchHnd),
		steward.MustServiceAsset(searchRepo),
		steward.MustServiceAsset(searchHnd),
		steward.MustServiceAsset(serverSvc, steward.Root()),
	)

	if cfg.Auth.OIDC.Enabled && cfg.Auth.OIDC.DevStub.Enabled {
		devStubSvc := &authpkg.OIDCDevStub{}
		registered = append(registered, devStubSvc)
		// Root: OIDCDevStub has no in-process dependents (it's a side listener),
		// so without explicit Root attachment steward treats it as orphan and
		// logs an ERR/WRN about graph quality.
		mgr.AddComponent(ctx, steward.MustServiceAsset(devStubSvc, steward.Root()))
	}
	if cfg.Auth.OIDC.Enabled {
		oidcSvc := &authpkg.OIDCVerifier{}
		registered = append(registered, oidcSvc)
		mgr.AddComponent(ctx, steward.MustServiceAsset(oidcSvc))
	}

	if err := mgr.Inject(ctx); err != nil {
		slog.Error("steward inject failed", scribe.Err(err))
		unwindOnError(registered)
		return exitLifecycleError
	}

	if err := mgr.Init(ctx); err != nil {
		slog.Error("steward init failed", scribe.Err(err))
		unwindOnError(registered)
		return exitLifecycleError
	}

	if err := mgr.Start(ctx); err != nil {
		slog.Error("steward start failed", scribe.Err(err))
		unwindOnError(registered)
		return exitLifecycleError
	}

	slog.Info("lethe ready", slog.String("bind", cfg.Server.Bind))

	// Block until SIGINT/SIGTERM (or a parent cancellation if main is ever
	// embedded). After this point we own the shutdown sequence.
	<-ctx.Done()
	slog.Info("signal received; shutting down")

	// Use a fresh context for shutdown — ctx is already cancelled. The 15s
	// budget bounds Stop+Destroy so a stuck dependency cannot keep the
	// process alive indefinitely.
	stopCtx, cancel := context.WithTimeout(context.Background(), shutdownGrace)
	defer cancel()

	if err := mgr.Stop(stopCtx); err != nil {
		slog.Error("steward stop returned error", scribe.Err(err))
	}
	if err := mgr.Destroy(context.Background()); err != nil {
		slog.Error("steward destroy returned error", scribe.Err(err))
	}

	slog.Info("lethe stopped")
	return exitOK
}

// unwindOnError walks the assets in reverse registration order and calls
// Destroy on each one that implements the destroyer interface. Individual
// errors are logged and swallowed so partial cleanup proceeds. Each Destroy
// call gets its own short-lived context so a hung dependency cannot block the
// rest. This compensates for the Phase 4 finding that steward.Manager does
// not unwind on its own when Init/Start fails.
func unwindOnError(registered []any) {
	for i := len(registered) - 1; i >= 0; i-- {
		svc, ok := registered[i].(destroyer)
		if !ok {
			continue
		}
		ctx, cancel := context.WithTimeout(context.Background(), perDestroyTimeout)
		if err := svc.Destroy(ctx); err != nil {
			slog.Warn("destroy on unwind failed",
				slog.String("component", fmt.Sprintf("%T", registered[i])),
				scribe.Err(err),
			)
		}
		cancel()
	}
}