M docs/tasks/lethe-server.md => docs/tasks/lethe-server.md +4 -1
@@ 406,6 406,9 @@ Greenfield — no compat surface. Wire format is the only forward-compat concern
### Notes carried forward
-- Phase 3 should add `migrate-up`, `migrate-down`, `migrate-create` to the Justfile alongside the migration runner so the targets aren't dead.
+- Phase 3 should add `migrate-up`, `migrate-down`, `migrate-create` to the Justfile alongside the migration runner so the targets aren't dead. (Done in Phase 3.)
- Each phase from 2 onward must remove the dep it adopts from `internal/deps/deps.go`; Phase 9 deletes the file.
- README's Caddy/Authelia snippets use `auth.example.com` placeholders; replace with phoebe-specific values when the production deploy lands (out of scope for this task).
+- **Phase 4 finding (steward unwind gap)**: `steward.Manager.Init` returns on the first failing `CallInit` and does **not** iterate back over previously-initialized assets to call `Destroy`. The canary test `TestStewardUnwindsOnInitFailure` (in `internal/platform/health/steward_unwind_test.go`) is intentionally red on master to document this. **Phase 9 main must compensate**: track each component as it init's and, on `Init` error, walk the list in reverse calling `Destroy` directly on each (don't try `mgr.Stop`/`mgr.Destroy` — those panic unless the manager has reached Started). Once Phase 9 lands the explicit unwind, either delete the canary test or convert it to assert the new compensating behavior.
+- **Phase 5 consistency fix (folded into commit `1b215bc` via amend)**: chi's default 404/405 handlers wrote text/plain, violating the invariant "errors leaving any HTTP handler are rendered as RFC 7807". Added explicit `chi.Router.NotFound`/`MethodNotAllowed` handlers that call `apierror.Render` with `NOT_FOUND` / `METHOD_NOT_ALLOWED` codes. Added `METHOD_NOT_ALLOWED → 405` entry to the apierror code-status map. Added two regression tests (`TestRouter_NotFoundReturnsProblemJSON`, `TestRouter_MethodNotAllowedReturnsProblemJSON`).
+- **Phase 3 → Phase 7 contract pin**: `INSERT … ON CONFLICT … DO UPDATE` fires the UPDATE trigger on SQLite (verified by `TestUpsertFiresUpdateTriggerAndKeepsFTSCoherent`). Regular FTS5 (not contentless / external content) was chosen so `WHERE owner = ?` works on the FTS table without a join — accepted the storage cost (`content` duplicated in real table + FTS shadow). Composite key order is `(owner, tool, host, session_id[, turn_id])` everywhere; ingest INSERT/UPDATE/ON CONFLICT clauses must match. `started_at`/`ended_at`/`source_file` are `NOT NULL` — ingest derives `started_at` from `MIN(turn.timestamp)` when `SessionMeta.StartedAt` is absent.
M go.sum => go.sum +4 -0
@@ 46,10 46,14 @@ github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
M internal/deps/deps.go => internal/deps/deps.go +5 -5
@@ 1,6 1,6 @@
// Package deps records the locked set of direct dependencies for the lethe
// server during early scaffolding. Real packages adopt these as they come
-// online (server — chi; auth — go-oidc).
+// online (auth — go-oidc).
//
// Phase 2 promoted viper, validator/v10, and culpa to real imports under
// internal/config. Phase 3 promoted sqlx, modernc.org/sqlite, and
@@ 8,12 8,12 @@
// under internal/platform/database, and culpa is now used there too. Phase 4
// promoted prometheus/client_golang and auxilia/scribe (under
// internal/platform/observability) and auxilia/steward (canary test under
-// internal/platform/health) to real imports. Once every dep below has at
-// least one real importer, this file is expected to disappear in the same
-// commit that completes the migration.
+// internal/platform/health) to real imports. Phase 5 promoted chi/v5 to a
+// real import under internal/server. Once every dep below has at least one
+// real importer, this file is expected to disappear in the same commit that
+// completes the migration.
package deps
import (
_ "github.com/coreos/go-oidc/v3/oidc"
- _ "github.com/go-chi/chi/v5"
)
A internal/domain/ingest/handler.go => internal/domain/ingest/handler.go +22 -0
@@ 0,0 1,22 @@
+// Phase-5 stub. Replaced by real implementation in Phase 7.
+//
+// Mount is a no-op so the /api/v1 group registers no ingest routes yet;
+// Server.Init still wires it in so the full router topology compiles.
+package ingest
+
+import (
+ "context"
+
+ "github.com/go-chi/chi/v5"
+)
+
+// Handler is a no-op steward service. Phase 7 supplies the real ingest
+// pipeline (NDJSON streaming, validation, chunked commit).
+type Handler struct{}
+
+// Init satisfies the steward Service contract.
+func (h *Handler) Init(_ context.Context) error { return nil }
+
+// Mount registers no routes. Phase 7 replaces this with the ingest endpoints
+// rooted at the /api/v1 group passed in by Server.Init.
+func (h *Handler) Mount(_ chi.Router) {}
A internal/domain/session/handler.go => internal/domain/session/handler.go +22 -0
@@ 0,0 1,22 @@
+// Phase-5 stub. Replaced by real implementation in Phase 8.
+//
+// Mount is a no-op so the /api/v1 group registers no session routes yet;
+// Server.Init still wires it in so the full router topology compiles.
+package session
+
+import (
+ "context"
+
+ "github.com/go-chi/chi/v5"
+)
+
+// Handler is a no-op steward service. Phase 8 supplies the real session
+// read API.
+type Handler struct{}
+
+// Init satisfies the steward Service contract.
+func (h *Handler) Init(_ context.Context) error { return nil }
+
+// Mount registers no routes. Phase 8 replaces this with the sessions list /
+// detail endpoints rooted at the /api/v1 group passed in by Server.Init.
+func (h *Handler) Mount(_ chi.Router) {}
A internal/pkg/apierror/apierror.go => internal/pkg/apierror/apierror.go +112 -0
@@ 0,0 1,112 @@
+// Package apierror is the single boundary between culpa-coded errors that
+// flow through the lethe server and the RFC 7807 `application/problem+json`
+// documents the HTTP layer hands back to clients.
+//
+// Render is the only exported entry point: it inspects the error's culpa
+// CodeDetail, maps the code to an HTTP status, builds a Problem document
+// from any attached PublicDetail, and writes it. 5xx responses log the full
+// stack trace via scribe.Err first, then redact the body so internal details
+// never leak to the client.
+package apierror
+
+import (
+ "encoding/json"
+ "log/slog"
+ "net/http"
+
+ "go.bigb.es/auxilia/culpa"
+ "go.bigb.es/auxilia/scribe"
+)
+
+// Problem is the RFC 7807 representation. The custom `code` extension holds
+// the machine-readable culpa code for client-side error handling, and
+// `errors` is reserved for per-line / per-field validation errors that the
+// ingest handler (Phase 7) produces.
+type Problem struct {
+ Type string `json:"type,omitempty"`
+ Title string `json:"title"`
+ Status int `json:"status"`
+ Detail string `json:"detail,omitempty"`
+ Code string `json:"code,omitempty"`
+ Instance string `json:"instance,omitempty"`
+ Errors []ProblemError `json:"errors,omitempty"`
+}
+
+// ProblemError is one element of the `errors` extension, used for batched
+// validation reports (e.g. NDJSON ingest line failures).
+type ProblemError struct {
+ Line int `json:"line,omitempty"`
+ Field string `json:"field,omitempty"`
+ Code string `json:"code,omitempty"`
+ Message string `json:"message"`
+}
+
+// codeStatus is the authoritative culpa-code → HTTP-status map. Any code
+// not listed here falls through to 500 and gets the sanitized treatment.
+var codeStatus = map[string]int{
+ "NOT_FOUND": http.StatusNotFound,
+ "METHOD_NOT_ALLOWED": http.StatusMethodNotAllowed,
+ "INVALID": http.StatusBadRequest,
+ "VALIDATION": http.StatusBadRequest,
+ "UNAUTHORIZED": http.StatusUnauthorized,
+ "FORBIDDEN": http.StatusForbidden,
+ "CONFLICT": http.StatusConflict,
+ "TOO_LARGE": http.StatusRequestEntityTooLarge,
+}
+
+// Render is the single boundary that translates a culpa-wrapped error into
+// an RFC 7807 response. Callers must not mutate w after invoking Render.
+//
+// 5xx responses log the full error (with stack trace) via scribe.Err before
+// redacting the body, so operators can diagnose without leaking specifics
+// to clients. 4xx responses surface the culpa PublicDetail as `detail` and
+// the CodeDetail as `code`.
+func Render(w http.ResponseWriter, r *http.Request, err error) {
+ status, code := lookup(err)
+
+ p := Problem{
+ Title: http.StatusText(status),
+ Status: status,
+ Instance: r.URL.Path,
+ }
+
+ if status >= 500 {
+ // Log the full chain (with stack) before sanitizing the response.
+ slog.Default().ErrorContext(r.Context(), "internal error", scribe.Err(err))
+ p.Detail = "internal server error"
+ } else {
+ p.Code = code
+ var pub culpa.PublicDetail
+ if culpa.FindDetail(err, &pub) {
+ p.Detail = pub.Message
+ }
+ }
+
+ w.Header().Set("Content-Type", "application/problem+json")
+ w.WriteHeader(status)
+ // Best-effort encode; if the client connection is gone there is nothing
+ // useful to do beyond logging.
+ if encErr := json.NewEncoder(w).Encode(p); encErr != nil {
+ slog.Default().ErrorContext(r.Context(), "encode problem document",
+ scribe.Err(encErr),
+ slog.Int("status", status),
+ )
+ }
+}
+
+// lookup walks the culpa chain for a CodeDetail and returns the matched
+// (status, code) pair. Missing or unknown codes default to (500, "").
+func lookup(err error) (int, string) {
+ var cd culpa.CodeDetail
+ if !culpa.FindDetail(err, &cd) {
+ return http.StatusInternalServerError, ""
+ }
+ codeStr, ok := cd.Code.(string)
+ if !ok {
+ return http.StatusInternalServerError, ""
+ }
+ if status, ok := codeStatus[codeStr]; ok {
+ return status, codeStr
+ }
+ return http.StatusInternalServerError, codeStr
+}
A internal/pkg/apierror/apierror_test.go => internal/pkg/apierror/apierror_test.go +152 -0
@@ 0,0 1,152 @@
+package apierror
+
+import (
+ "encoding/json"
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+
+ "go.bigb.es/auxilia/culpa"
+)
+
+// TestRender_CodeToStatusMapping verifies each documented culpa code maps to
+// the expected HTTP status, and the response body has the RFC 7807 shape with
+// the code echoed under the "code" extension field.
+func TestRender_CodeToStatusMapping(t *testing.T) {
+ cases := []struct {
+ name string
+ code string
+ wantSt int
+ wantTit string
+ }{
+ {"not-found", "NOT_FOUND", http.StatusNotFound, http.StatusText(http.StatusNotFound)},
+ {"invalid", "INVALID", http.StatusBadRequest, http.StatusText(http.StatusBadRequest)},
+ {"validation", "VALIDATION", http.StatusBadRequest, http.StatusText(http.StatusBadRequest)},
+ {"unauthorized", "UNAUTHORIZED", http.StatusUnauthorized, http.StatusText(http.StatusUnauthorized)},
+ {"forbidden", "FORBIDDEN", http.StatusForbidden, http.StatusText(http.StatusForbidden)},
+ {"conflict", "CONFLICT", http.StatusConflict, http.StatusText(http.StatusConflict)},
+ {"too-large", "TOO_LARGE", http.StatusRequestEntityTooLarge, http.StatusText(http.StatusRequestEntityTooLarge)},
+ {"unknown-defaults-500", "WHO_KNOWS", http.StatusInternalServerError, http.StatusText(http.StatusInternalServerError)},
+ }
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ err := culpa.WithCode(culpa.WithPublic(culpa.New("boom"), "user-facing"), tc.code)
+ rec := httptest.NewRecorder()
+ req := httptest.NewRequest(http.MethodGet, "/api/v1/things/42", nil)
+
+ Render(rec, req, err)
+
+ if got := rec.Code; got != tc.wantSt {
+ t.Fatalf("status = %d; want %d", got, tc.wantSt)
+ }
+ if ct := rec.Header().Get("Content-Type"); ct != "application/problem+json" {
+ t.Errorf("Content-Type = %q; want application/problem+json", ct)
+ }
+ var p Problem
+ if err := json.Unmarshal(rec.Body.Bytes(), &p); err != nil {
+ t.Fatalf("unmarshal body: %v", err)
+ }
+ if p.Status != tc.wantSt {
+ t.Errorf("body.status = %d; want %d", p.Status, tc.wantSt)
+ }
+ if p.Title != tc.wantTit {
+ t.Errorf("body.title = %q; want %q", p.Title, tc.wantTit)
+ }
+ if p.Instance != "/api/v1/things/42" {
+ t.Errorf("body.instance = %q; want /api/v1/things/42", p.Instance)
+ }
+ // 5xx codes have their detail sanitized to the constant message.
+ if tc.wantSt >= 500 {
+ if p.Detail != "internal server error" {
+ t.Errorf("body.detail = %q; want %q", p.Detail, "internal server error")
+ }
+ // Unknown code is not echoed as the public error code.
+ if p.Code == "WHO_KNOWS" {
+ t.Errorf("body.code = WHO_KNOWS leaked for 5xx response")
+ }
+ } else {
+ if p.Code != tc.code {
+ t.Errorf("body.code = %q; want %q", p.Code, tc.code)
+ }
+ if p.Detail != "user-facing" {
+ t.Errorf("body.detail = %q; want %q", p.Detail, "user-facing")
+ }
+ }
+ })
+ }
+}
+
+// TestRender_5xxSanitizesDetail proves an internal error never leaks the
+// underlying message (which may contain credentials or stack-like text)
+// through the response body.
+func TestRender_5xxSanitizesDetail(t *testing.T) {
+ sensitive := "DB password=hunter2 connection refused"
+ err := culpa.Wrap(culpa.New(sensitive), "open db")
+
+ rec := httptest.NewRecorder()
+ req := httptest.NewRequest(http.MethodGet, "/whatever", nil)
+ Render(rec, req, err)
+
+ if rec.Code != http.StatusInternalServerError {
+ t.Fatalf("status = %d; want 500", rec.Code)
+ }
+ body := rec.Body.String()
+ if strings.Contains(body, "hunter2") || strings.Contains(body, "password") {
+ t.Fatalf("response body leaked sensitive substring; body=%q", body)
+ }
+ var p Problem
+ if err := json.Unmarshal([]byte(body), &p); err != nil {
+ t.Fatalf("unmarshal: %v", err)
+ }
+ if p.Detail != "internal server error" {
+ t.Errorf("detail = %q; want sanitized", p.Detail)
+ }
+}
+
+// TestRender_NoCodeDefaultsTo500 ensures errors without a CodeDetail still
+// produce a valid problem document at 500.
+func TestRender_NoCodeDefaultsTo500(t *testing.T) {
+ err := culpa.New("plain")
+
+ rec := httptest.NewRecorder()
+ req := httptest.NewRequest(http.MethodGet, "/x", nil)
+ Render(rec, req, err)
+
+ if rec.Code != http.StatusInternalServerError {
+ t.Fatalf("status = %d; want 500", rec.Code)
+ }
+ var p Problem
+ if err := json.Unmarshal(rec.Body.Bytes(), &p); err != nil {
+ t.Fatalf("unmarshal: %v", err)
+ }
+ if p.Status != 500 {
+ t.Errorf("body.status = %d; want 500", p.Status)
+ }
+ if p.Detail != "internal server error" {
+ t.Errorf("detail = %q; want sanitized", p.Detail)
+ }
+}
+
+// TestRender_PublicDetailUsedForClientErrors confirms PublicDetail is what
+// shows up in the client-facing detail for non-5xx responses.
+func TestRender_PublicDetailUsedForClientErrors(t *testing.T) {
+ err := culpa.WithCode(culpa.WithPublic(culpa.New("internal"), "session not found"), "NOT_FOUND")
+ rec := httptest.NewRecorder()
+ req := httptest.NewRequest(http.MethodGet, "/api/v1/sessions/abc", nil)
+ Render(rec, req, err)
+
+ if rec.Code != http.StatusNotFound {
+ t.Fatalf("status = %d; want 404", rec.Code)
+ }
+ var p Problem
+ if err := json.Unmarshal(rec.Body.Bytes(), &p); err != nil {
+ t.Fatalf("unmarshal: %v", err)
+ }
+ if p.Detail != "session not found" {
+ t.Errorf("detail = %q; want public message", p.Detail)
+ }
+ if p.Code != "NOT_FOUND" {
+ t.Errorf("code = %q; want NOT_FOUND", p.Code)
+ }
+}
A internal/pkg/httputil/httputil.go => internal/pkg/httputil/httputil.go +89 -0
@@ 0,0 1,89 @@
+// Package httputil holds the small set of request/response helpers shared
+// across HTTP handlers in lethe: JSON read with body cap and strict
+// decoding, JSON write, and an iterator-based NDJSON line reader for the
+// ingest endpoint.
+//
+// The helpers return culpa-coded errors so the apierror.Render boundary can
+// translate them into RFC 7807 responses without losing the cause.
+package httputil
+
+import (
+ "bufio"
+ "encoding/json"
+ "errors"
+ "io"
+ "iter"
+ "net/http"
+
+ "go.bigb.es/auxilia/culpa"
+)
+
+// ReadJSON limits the request body to maxBytes, decodes it into dst with
+// DisallowUnknownFields, and rejects trailing tokens. Body-cap exceedance
+// is reported with code TOO_LARGE; any other parse failure is INVALID.
+func ReadJSON[T any](r *http.Request, dst *T, maxBytes int64) error {
+ r.Body = http.MaxBytesReader(nil, r.Body, maxBytes)
+ dec := json.NewDecoder(r.Body)
+ dec.DisallowUnknownFields()
+ if err := dec.Decode(dst); err != nil {
+ var maxErr *http.MaxBytesError
+ if errors.As(err, &maxErr) {
+ return culpa.WithCode(culpa.Wrap(err, "request body exceeds limit"), "TOO_LARGE")
+ }
+ return culpa.WithCode(culpa.Wrap(err, "decode json body"), "INVALID")
+ }
+ // Reject trailing data after the first JSON value.
+ if dec.More() {
+ return culpa.WithCode(culpa.New("unexpected data after JSON body"), "INVALID")
+ }
+ return nil
+}
+
+// WriteJSON encodes v as JSON with the given status code and the standard
+// application/json content type. Encoder errors are returned so the caller
+// can decide how to surface them; in practice they only fire on a dropped
+// client connection.
+func WriteJSON(w http.ResponseWriter, status int, v any) error {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(status)
+ return json.NewEncoder(w).Encode(v)
+}
+
+// ReadNDJSONLines streams NDJSON lines from r, yielding the raw bytes of
+// each non-empty line. The yielded slice is owned by an internal buffer
+// and is reused on the next iteration: consumers that retain a line must
+// copy it themselves.
+//
+// Body-cap exceedance and oversized single lines surface as a (nil, err)
+// terminal yield with code TOO_LARGE; other scanner failures use INVALID.
+func ReadNDJSONLines(r io.Reader, maxBytes int64) iter.Seq2[[]byte, error] {
+ return func(yield func([]byte, error) bool) {
+ limited := io.LimitReader(r, maxBytes+1)
+ sc := bufio.NewScanner(limited)
+ // Allow the scanner to grow the token buffer up to the body cap.
+ // The starting buffer of 64 KiB is plenty for typical lines.
+ sc.Buffer(make([]byte, 0, 64*1024), int(maxBytes)+1)
+ var read int64
+ for sc.Scan() {
+ line := sc.Bytes()
+ read += int64(len(line)) + 1 // +1 for the consumed newline
+ if read > maxBytes {
+ yield(nil, culpa.WithCode(culpa.New("ndjson body exceeds limit"), "TOO_LARGE"))
+ return
+ }
+ if len(line) == 0 {
+ continue
+ }
+ if !yield(line, nil) {
+ return
+ }
+ }
+ if err := sc.Err(); err != nil {
+ if errors.Is(err, bufio.ErrTooLong) {
+ yield(nil, culpa.WithCode(culpa.Wrap(err, "ndjson line too long"), "TOO_LARGE"))
+ return
+ }
+ yield(nil, culpa.WithCode(culpa.Wrap(err, "scan ndjson"), "INVALID"))
+ }
+ }
+}
A internal/server/auth/authenticator.go => internal/server/auth/authenticator.go +22 -0
@@ 0,0 1,22 @@
+// Phase-5 stub. Replaced by real implementation in Phase 6.
+//
+// This stub only satisfies the compile-time surface that Server depends on:
+// an Init that does nothing and a Middleware that is the identity passthrough.
+// Phase 6 replaces this file with the forward-auth + OIDC implementation.
+package auth
+
+import (
+ "context"
+ "net/http"
+)
+
+// Authenticator is a no-op steward service. The real Phase-6 version will
+// hold OIDC verifier state and forward-auth config.
+type Authenticator struct{}
+
+// Init satisfies the steward Service contract.
+func (a *Authenticator) Init(_ context.Context) error { return nil }
+
+// Middleware returns next unchanged. Phase 6 replaces this with the
+// authentication chain.
+func (a *Authenticator) Middleware(next http.Handler) http.Handler { return next }
A internal/server/server.go => internal/server/server.go +279 -0
@@ 0,0 1,279 @@
+// Package server is the steward-managed HTTP layer for lethe. It owns the
+// chi router, the standard middleware stack (request-id, logging, metrics,
+// recovery), the unauthenticated probes (/healthz, /readyz, /metrics), and
+// the authenticated /api/v1/* group inside which the ingest and session
+// handlers register their routes.
+//
+// The Server is a steward.Root() service: it has no consumers and steward
+// would otherwise prune it. main.go (Phase 9) registers it as the root.
+//
+// Bind validation is fail-fast: only loopback IPs are accepted. Phase 5
+// keeps the system reachable by the local collector; exposing it on a
+// public interface is a config error.
+package server
+
+import (
+ "context"
+ "crypto/rand"
+ "encoding/hex"
+ "errors"
+ "fmt"
+ "log/slog"
+ "net"
+ "net/http"
+ "time"
+
+ "github.com/go-chi/chi/v5"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+ "go.bigb.es/auxilia/culpa"
+ "go.bigb.es/auxilia/scribe"
+
+ "sourcecraft.dev/bigbes/lethe/internal/config"
+ "sourcecraft.dev/bigbes/lethe/internal/domain/ingest"
+ "sourcecraft.dev/bigbes/lethe/internal/domain/session"
+ "sourcecraft.dev/bigbes/lethe/internal/pkg/apierror"
+ "sourcecraft.dev/bigbes/lethe/internal/pkg/httputil"
+ "sourcecraft.dev/bigbes/lethe/internal/platform/health"
+ "sourcecraft.dev/bigbes/lethe/internal/platform/observability"
+ authpkg "sourcecraft.dev/bigbes/lethe/internal/server/auth"
+)
+
+// readyzTimeout caps the time /readyz spends running every health check.
+// Long enough to absorb a transient slow probe, short enough that orchestrators
+// do not wait too long on a stuck dependency.
+const readyzTimeout = 5 * time.Second
+
+// Server is the HTTP steward service. Steward injects the shared platform
+// services and the auth/handler stubs (which Phases 6/7/8 replace).
+type Server struct {
+ Cfg config.ServerConfig `config:""`
+
+ Log *observability.Logger `inject:""`
+ Metrics *observability.Metrics `inject:""`
+ Health *health.Set `inject:""`
+
+ Auth *authpkg.Authenticator `inject:""`
+ Ingest *ingest.Handler `inject:""`
+ Sessions *session.Handler `inject:""`
+
+ router *chi.Mux
+ httpSrv *http.Server
+}
+
+// Init validates the bind address and constructs the chi router with the
+// standard middleware stack and all routes mounted. It does not start the
+// listener — that is Start's job.
+func (s *Server) Init(_ context.Context) error {
+ if err := validateLoopbackBind(s.Cfg.Bind); err != nil {
+ return err
+ }
+
+ r := chi.NewRouter()
+ r.Use(s.requestIDMiddleware)
+ r.Use(s.loggingMiddleware)
+ r.Use(s.metricsMiddleware)
+ r.Use(s.recoveryMiddleware)
+
+ r.NotFound(notFoundHandler)
+ r.MethodNotAllowed(methodNotAllowedHandler)
+
+ r.Get("/healthz", s.healthzHandler)
+ r.Get("/readyz", s.readyzHandler)
+ r.Handle("/metrics", promhttp.HandlerFor(s.Metrics.Registry, promhttp.HandlerOpts{}))
+
+ r.Route("/api/v1", func(r chi.Router) {
+ r.Use(s.Auth.Middleware)
+ s.Ingest.Mount(r)
+ s.Sessions.Mount(r)
+ })
+
+ s.router = r
+ return nil
+}
+
+// Start spawns ListenAndServe in the background and returns immediately.
+// Errors are logged; steward observes lifecycle via Stop.
+func (s *Server) Start(_ context.Context) error {
+ s.httpSrv = &http.Server{
+ Addr: s.Cfg.Bind,
+ Handler: s.router,
+ ReadHeaderTimeout: 10 * time.Second,
+ }
+ go func() {
+ if err := s.httpSrv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
+ slog.Default().Error("server crashed", scribe.Err(err))
+ }
+ }()
+ return nil
+}
+
+// Stop gracefully drains in-flight requests within ShutdownGrace, then
+// closes the listener. Idempotent if Start was never called.
+func (s *Server) Stop(ctx context.Context) error {
+ if s.httpSrv == nil {
+ return nil
+ }
+ grace := s.Cfg.ShutdownGrace
+ if grace <= 0 {
+ grace = 10 * time.Second
+ }
+ ctx2, cancel := context.WithTimeout(ctx, grace)
+ defer cancel()
+ return s.httpSrv.Shutdown(ctx2)
+}
+
+// validateLoopbackBind ensures s.Cfg.Bind resolves to a loopback IP.
+// Anything else is rejected at startup with code CONFIG_INVALID.
+func validateLoopbackBind(bind string) error {
+ host, _, err := net.SplitHostPort(bind)
+ if err != nil {
+ return culpa.WithCode(culpa.Wrap(err, "parse bind address"), "CONFIG_INVALID")
+ }
+ ip := net.ParseIP(host)
+ if ip == nil {
+ return culpa.WithCode(culpa.Errorf("bind host %q is not an IP literal", host), "CONFIG_INVALID")
+ }
+ if !ip.IsLoopback() {
+ return culpa.WithCode(culpa.Errorf("bind %q must resolve to a loopback IP", bind), "CONFIG_INVALID")
+ }
+ return nil
+}
+
+// healthzHandler is a constant-200 process-up probe.
+func (s *Server) healthzHandler(w http.ResponseWriter, _ *http.Request) {
+ w.Header().Set("Content-Type", "text/plain; charset=utf-8")
+ w.WriteHeader(http.StatusOK)
+ _, _ = w.Write([]byte("ok"))
+}
+
+// readyzResponse is the JSON body returned by /readyz.
+type readyzResponse struct {
+ Checks map[string]string `json:"checks"`
+}
+
+// readyzHandler runs every registered Checker with a 5s ceiling. Any
+// failure flips the status to 503; the body always lists each check.
+func (s *Server) readyzHandler(w http.ResponseWriter, r *http.Request) {
+ ctx, cancel := context.WithTimeout(r.Context(), readyzTimeout)
+ defer cancel()
+
+ results, allOK := s.Health.Run(ctx)
+ body := readyzResponse{Checks: make(map[string]string, len(results))}
+ for name, err := range results {
+ if err == nil {
+ body.Checks[name] = "ok"
+ } else {
+ body.Checks[name] = err.Error()
+ }
+ }
+ status := http.StatusOK
+ if !allOK {
+ status = http.StatusServiceUnavailable
+ }
+ if err := httputil.WriteJSON(w, status, body); err != nil {
+ slog.Default().ErrorContext(r.Context(), "write readyz response", scribe.Err(err))
+ }
+}
+
+// requestIDMiddleware mints a 32-char hex id, attaches it to the request
+// context for the logger to pick up, and echoes it as X-Request-ID. The
+// header is set before next.ServeHTTP so even slow handlers expose it.
+func (s *Server) requestIDMiddleware(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ var buf [16]byte
+ // crypto/rand.Read in Go 1.24+ is documented infallible.
+ _, _ = rand.Read(buf[:])
+ id := hex.EncodeToString(buf[:])
+ w.Header().Set("X-Request-ID", id)
+ ctx := observability.WithRequestID(r.Context(), id)
+ next.ServeHTTP(w, r.WithContext(ctx))
+ })
+}
+
+// responseRecorder is a minimal ResponseWriter wrapper that captures the
+// final status code so the logging and metrics middlewares can label their
+// records correctly.
+type responseRecorder struct {
+ http.ResponseWriter
+ status int
+ wroteHeader bool
+}
+
+// WriteHeader records the status before delegating.
+func (rr *responseRecorder) WriteHeader(code int) {
+ if rr.wroteHeader {
+ return
+ }
+ rr.status = code
+ rr.wroteHeader = true
+ rr.ResponseWriter.WriteHeader(code)
+}
+
+// Write triggers an implicit 200 if the handler wrote without WriteHeader.
+func (rr *responseRecorder) Write(b []byte) (int, error) {
+ if !rr.wroteHeader {
+ rr.status = http.StatusOK
+ rr.wroteHeader = true
+ }
+ return rr.ResponseWriter.Write(b)
+}
+
+// loggingMiddleware emits one structured access log per request. The
+// request_id attribute is added automatically by contextHandler.
+func (s *Server) loggingMiddleware(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ start := time.Now()
+ rr := &responseRecorder{ResponseWriter: w, status: http.StatusOK}
+ next.ServeHTTP(rr, r)
+ slog.Default().InfoContext(r.Context(), "request",
+ slog.String("method", r.Method),
+ slog.String("path", r.URL.Path),
+ slog.Int("status", rr.status),
+ slog.Int64("duration_ms", time.Since(start).Milliseconds()),
+ )
+ })
+}
+
+// metricsMiddleware increments the request counter and observes the
+// duration histogram. The route label comes from chi's RoutePattern so
+// cardinality stays bounded; unmatched paths fall back to "unknown".
+func (s *Server) metricsMiddleware(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ start := time.Now()
+ rr := &responseRecorder{ResponseWriter: w, status: http.StatusOK}
+ next.ServeHTTP(rr, r)
+
+ route := "unknown"
+ if rc := chi.RouteContext(r.Context()); rc != nil {
+ if pat := rc.RoutePattern(); pat != "" {
+ route = pat
+ }
+ }
+ statusStr := fmt.Sprintf("%d", rr.status)
+ s.Metrics.HTTPRequests.WithLabelValues(r.Method, route, statusStr).Inc()
+ s.Metrics.HTTPDuration.WithLabelValues(r.Method, route).Observe(time.Since(start).Seconds())
+ })
+}
+
+// recoveryMiddleware turns a panic into a 500 problem document. The
+// stacktrace logging is handled by apierror.Render's 5xx branch, which
+// receives the panic-as-error.
+func (s *Server) recoveryMiddleware(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ defer func() {
+ if rec := recover(); rec != nil {
+ err := culpa.WithCode(culpa.Errorf("panic: %v", rec), "INTERNAL")
+ apierror.Render(w, r, err)
+ }
+ }()
+ next.ServeHTTP(w, r)
+ })
+}
+
+func notFoundHandler(w http.ResponseWriter, r *http.Request) {
+ apierror.Render(w, r, culpa.WithCode(culpa.New("route not found"), "NOT_FOUND"))
+}
+
+func methodNotAllowedHandler(w http.ResponseWriter, r *http.Request) {
+ apierror.Render(w, r, culpa.WithCode(culpa.New("method not allowed"), "METHOD_NOT_ALLOWED"))
+}
A internal/server/server_test.go => internal/server/server_test.go +253 -0
@@ 0,0 1,253 @@
+package server
+
+import (
+ "context"
+ "encoding/json"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+
+ "github.com/go-chi/chi/v5"
+ "go.bigb.es/auxilia/culpa"
+
+ "sourcecraft.dev/bigbes/lethe/internal/config"
+ "sourcecraft.dev/bigbes/lethe/internal/domain/ingest"
+ "sourcecraft.dev/bigbes/lethe/internal/domain/session"
+ "sourcecraft.dev/bigbes/lethe/internal/platform/health"
+ "sourcecraft.dev/bigbes/lethe/internal/platform/observability"
+ authpkg "sourcecraft.dev/bigbes/lethe/internal/server/auth"
+)
+
+// newTestServer wires a Server with hand-constructed dependencies so unit
+// tests do not need to spin up the steward graph.
+func newTestServer(t *testing.T, bind string) *Server {
+ t.Helper()
+
+ logger := &observability.Logger{Cfg: config.LoggingConfig{Level: "info", Format: "json"}}
+ if err := logger.Init(context.Background()); err != nil {
+ t.Fatalf("logger.Init: %v", err)
+ }
+ metrics := &observability.Metrics{}
+ if err := metrics.Init(context.Background()); err != nil {
+ t.Fatalf("metrics.Init: %v", err)
+ }
+ return &Server{
+ Cfg: config.ServerConfig{
+ Bind: bind,
+ ShutdownGrace: 5 * time.Second,
+ },
+ Log: logger,
+ Metrics: metrics,
+ Health: &health.Set{},
+ Auth: &authpkg.Authenticator{},
+ Ingest: &ingest.Handler{},
+ Sessions: &session.Handler{},
+ }
+}
+
+func TestServerInit_RejectsNonLoopbackBind(t *testing.T) {
+ s := newTestServer(t, "0.0.0.0:8080")
+ err := s.Init(context.Background())
+ if err == nil {
+ t.Fatalf("Init: expected error for non-loopback bind")
+ }
+ var cd culpa.CodeDetail
+ if !culpa.FindDetail(err, &cd) {
+ t.Fatalf("Init: expected culpa CodeDetail; got %v", err)
+ }
+ if cd.Code != "CONFIG_INVALID" {
+ t.Errorf("Init: code = %v; want CONFIG_INVALID", cd.Code)
+ }
+}
+
+func TestServerInit_AcceptsLoopback(t *testing.T) {
+ s := newTestServer(t, "127.0.0.1:0")
+ if err := s.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+ if s.router == nil {
+ t.Fatalf("Init: router not built")
+ }
+}
+
+func TestRouter_RecoveryTurnsPanicInto500Problem(t *testing.T) {
+ s := newTestServer(t, "127.0.0.1:0")
+ if err := s.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+ // Mount a panicking route on the router *after* Init wired the
+ // middleware chain so the recovery middleware sits in front of it.
+ s.router.Get("/boom", func(http.ResponseWriter, *http.Request) {
+ panic("kaboom")
+ })
+
+ req := httptest.NewRequest(http.MethodGet, "/boom", nil)
+ rec := httptest.NewRecorder()
+ s.router.ServeHTTP(rec, req)
+
+ if rec.Code != http.StatusInternalServerError {
+ t.Fatalf("status = %d; want 500", rec.Code)
+ }
+ if ct := rec.Header().Get("Content-Type"); ct != "application/problem+json" {
+ t.Errorf("Content-Type = %q; want application/problem+json", ct)
+ }
+ var body map[string]any
+ if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+ t.Fatalf("unmarshal body: %v", err)
+ }
+ if body["status"].(float64) != 500 {
+ t.Errorf("body.status = %v; want 500", body["status"])
+ }
+ if body["detail"] != "internal server error" {
+ t.Errorf("body.detail = %v; want sanitized message", body["detail"])
+ }
+}
+
+func TestRouter_RequestIDInResponseAndContext(t *testing.T) {
+ s := newTestServer(t, "127.0.0.1:0")
+ if err := s.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+ var fromCtx string
+ s.router.Get("/probe", func(w http.ResponseWriter, r *http.Request) {
+ fromCtx = observability.RequestIDFrom(r.Context())
+ w.WriteHeader(http.StatusOK)
+ })
+
+ req := httptest.NewRequest(http.MethodGet, "/probe", nil)
+ rec := httptest.NewRecorder()
+ s.router.ServeHTTP(rec, req)
+
+ if rec.Code != http.StatusOK {
+ t.Fatalf("status = %d; want 200", rec.Code)
+ }
+ headerID := rec.Header().Get("X-Request-ID")
+ if headerID == "" {
+ t.Fatalf("X-Request-ID header missing")
+ }
+ if fromCtx == "" {
+ t.Fatalf("request id missing from context")
+ }
+ if headerID != fromCtx {
+ t.Errorf("ctx id %q != header id %q", fromCtx, headerID)
+ }
+}
+
+func TestRouter_HealthzReturnsOK(t *testing.T) {
+ s := newTestServer(t, "127.0.0.1:0")
+ if err := s.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+ req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
+ rec := httptest.NewRecorder()
+ s.router.ServeHTTP(rec, req)
+ if rec.Code != http.StatusOK {
+ t.Fatalf("status = %d; want 200", rec.Code)
+ }
+ if rec.Body.String() != "ok" {
+ t.Errorf("body = %q; want ok", rec.Body.String())
+ }
+}
+
+func TestRouter_ReadyzAllOKWithEmptyChecks(t *testing.T) {
+ s := newTestServer(t, "127.0.0.1:0")
+ if err := s.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+ req := httptest.NewRequest(http.MethodGet, "/readyz", nil)
+ rec := httptest.NewRecorder()
+ s.router.ServeHTTP(rec, req)
+ if rec.Code != http.StatusOK {
+ t.Fatalf("status = %d; want 200", rec.Code)
+ }
+}
+
+func TestRouter_MetricsExposesPrometheus(t *testing.T) {
+ s := newTestServer(t, "127.0.0.1:0")
+ if err := s.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+ req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
+ rec := httptest.NewRecorder()
+ s.router.ServeHTTP(rec, req)
+ if rec.Code != http.StatusOK {
+ t.Fatalf("status = %d; want 200", rec.Code)
+ }
+ if got := rec.Body.String(); got == "" {
+ t.Errorf("metrics body empty")
+ }
+}
+
+// TestRouter_APIv1MountsAuthMiddleware is a smoke test: the auth middleware
+// is the identity passthrough in Phase 5, but the /api/v1 group must still
+// wire it in so Phase 6's replacement immediately takes effect on every
+// route registered by the ingest/session handlers.
+func TestRouter_APIv1MountsAuthMiddleware(t *testing.T) {
+ s := newTestServer(t, "127.0.0.1:0")
+ if err := s.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+ // Hang a probe route off the /api/v1 group via the same chi router.
+ s.router.Route("/api/v1/probe", func(r chi.Router) {
+ r.Get("/", func(w http.ResponseWriter, _ *http.Request) {
+ w.WriteHeader(http.StatusTeapot)
+ })
+ })
+ req := httptest.NewRequest(http.MethodGet, "/api/v1/probe", nil)
+ rec := httptest.NewRecorder()
+ s.router.ServeHTTP(rec, req)
+ if rec.Code != http.StatusTeapot {
+ t.Fatalf("status = %d; want 418", rec.Code)
+ }
+}
+
+func TestRouter_NotFoundReturnsProblemJSON(t *testing.T) {
+ srv := newTestServer(t, "127.0.0.1:0")
+ if err := srv.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+
+ req := httptest.NewRequest(http.MethodGet, "/no-such-route", nil)
+ rec := httptest.NewRecorder()
+ srv.router.ServeHTTP(rec, req)
+
+ if rec.Code != http.StatusNotFound {
+ t.Fatalf("status: got %d, want 404", rec.Code)
+ }
+ if ct := rec.Header().Get("Content-Type"); ct != "application/problem+json" {
+ t.Fatalf("Content-Type: got %q, want application/problem+json", ct)
+ }
+ var p map[string]any
+ if err := json.Unmarshal(rec.Body.Bytes(), &p); err != nil {
+ t.Fatalf("body is not JSON: %v", err)
+ }
+ if got, _ := p["code"].(string); got != "NOT_FOUND" {
+ t.Fatalf("code: got %q, want NOT_FOUND", got)
+ }
+}
+
+func TestRouter_MethodNotAllowedReturnsProblemJSON(t *testing.T) {
+ srv := newTestServer(t, "127.0.0.1:0")
+ if err := srv.Init(context.Background()); err != nil {
+ t.Fatalf("Init: %v", err)
+ }
+
+ req := httptest.NewRequest(http.MethodPost, "/healthz", nil)
+ rec := httptest.NewRecorder()
+ srv.router.ServeHTTP(rec, req)
+
+ if rec.Code != http.StatusMethodNotAllowed {
+ t.Fatalf("status: got %d, want 405", rec.Code)
+ }
+ if ct := rec.Header().Get("Content-Type"); ct != "application/problem+json" {
+ t.Fatalf("Content-Type: got %q, want application/problem+json", ct)
+ }
+ var p map[string]any
+ if err := json.Unmarshal(rec.Body.Bytes(), &p); err != nil {
+ t.Fatalf("body is not JSON: %v", err)
+ }
+ if got, _ := p["code"].(string); got != "METHOD_NOT_ALLOWED" {
+ t.Fatalf("code: got %q, want METHOD_NOT_ALLOWED", got)
+ }
+}