package search
import (
"context"
"crypto/sha256"
"database/sql"
"encoding/base64"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"strings"
"go.bigb.es/auxilia/culpa"
"sourcecraft.dev/bigbes/lethe/internal/domain/session"
"sourcecraft.dev/bigbes/lethe/internal/platform/database"
)
const (
SourceTurn = "turn"
SourceToolOutput = "tool_output"
cursorVersion = 1
)
// Result is the repository response shape for paginated search output.
type Result struct {
Results []Row `json:"results"`
Limit int `json:"limit"`
NextCursor string `json:"next_cursor,omitempty"`
Rows []Row `json:"-"`
}
// Row is one matched turn. Snippet is generated by SQLite FTS5 and uses marker
// runes around hits; it never contains HTML markup added by the repository.
type Row struct {
Owner string `db:"owner" json:"owner"`
Tool string `db:"tool" json:"tool"`
Host string `db:"host" json:"host"`
SessionID string `db:"session_id" json:"session_id"`
WorkingDir *string `db:"working_dir" json:"working_dir,omitempty"`
TurnID string `db:"turn_id" json:"turn_id"`
Seq int64 `db:"seq" json:"seq"`
Role string `db:"role" json:"role"`
Timestamp int64 `db:"timestamp" json:"timestamp"`
Rank float64 `db:"rank" json:"rank"`
MatchSource string `db:"match_source" json:"match_source"`
Snippet string `db:"snippet" json:"snippet"`
RowID int64 `db:"rowid" json:"-"`
}
// Filter controls query text, owner scoping, optional filters, and pagination.
type Filter struct {
Owner session.OwnerScope
Query string
IncludeToolOutputs bool
Tool *string
Host *string
Since *int64
Until *int64
Limit int
Cursor string
}
// Cursor is the stable keyset position for the last row returned by a page.
type Cursor struct {
Rank float64 `json:"rank"`
Timestamp int64 `json:"timestamp"`
TurnID string `json:"turn_id"`
MatchSource string `json:"match_source"`
RowID int64 `json:"rowid,omitempty"`
}
// Repository is the SQL steward for FTS search. It is read-only: Search only
// builds SELECT statements over the existing FTS and turns tables.
type Repository struct {
Database *database.Database `inject:""`
}
func (r *Repository) Init(_ context.Context) error { return nil }
// Search executes an FTS5 query against prose turns by default and unions tool
// output matches only when requested. Duplicate hits for the same turn are
// collapsed by best rank, with deterministic tie-breakers.
func (r *Repository) Search(ctx context.Context, f Filter) (*Result, error) {
if strings.TrimSpace(f.Query) == "" {
return nil, invalid("search query is empty")
}
if f.Limit <= 0 {
return &Result{Results: []Row{}, Limit: f.Limit, Rows: []Row{}}, nil
}
var after *Cursor
if f.Cursor != "" {
c, err := DecodeCursor(f.Cursor, f)
if err != nil {
return nil, err
}
after = &c
}
query, args := buildSearchSQL(f, after)
rows := make([]Row, 0)
if err := r.Database.DB.SelectContext(ctx, &rows, query, args...); err != nil {
if isFTSError(err) {
return nil, invalid("invalid search query")
}
return nil, culpa.WithCode(culpa.Wrap(err, "search"), "DB_QUERY")
}
res := &Result{Results: rows, Limit: f.Limit, Rows: rows}
if len(rows) > f.Limit {
last := rows[f.Limit-1]
res.Results = rows[:f.Limit]
res.Rows = res.Results
next, err := EncodeCursor(Cursor{Rank: last.Rank, Timestamp: last.Timestamp, TurnID: last.TurnID, MatchSource: last.MatchSource, RowID: last.RowID}, f)
if err != nil {
return nil, err
}
res.NextCursor = next
}
return res, nil
}
func buildSearchSQL(f Filter, after *Cursor) (string, []any) {
limit := f.Limit + 1
var sb strings.Builder
args := make([]any, 0, 12)
sb.WriteString(`WITH candidates AS (`)
appendFTSSelect(&sb, &args, "turns_fts", SourceTurn, 0, f)
if f.IncludeToolOutputs {
sb.WriteString(` UNION ALL `)
appendFTSSelect(&sb, &args, "tool_outputs_fts", SourceToolOutput, 1, f)
}
sb.WriteString(`), ranked AS (`)
sb.WriteString(`SELECT *, ROW_NUMBER() OVER (PARTITION BY rowid ORDER BY rank ASC, match_source ASC, rowid ASC) AS rn FROM candidates`)
sb.WriteString(`) SELECT t.owner, t.tool, t.host, t.session_id, s.working_dir, t.turn_id, t.seq, t.role, t.timestamp, ranked.rank, ranked.match_source, ranked.snippet, ranked.rowid`)
sb.WriteString(` FROM ranked JOIN turns t ON t.rowid = ranked.rowid`)
sb.WriteString(` JOIN sessions s ON s.owner = t.owner AND s.tool = t.tool AND s.host = t.host AND s.session_id = t.session_id`)
sb.WriteString(` WHERE ranked.rn = 1`)
if f.Since != nil {
sb.WriteString(` AND t.timestamp >= ?`)
args = append(args, *f.Since)
}
if f.Until != nil {
sb.WriteString(` AND t.timestamp <= ?`)
args = append(args, *f.Until)
}
if after != nil {
sb.WriteString(` AND (`)
sb.WriteString(`ranked.rank > ?`)
sb.WriteString(` OR (ranked.rank = ? AND t.timestamp < ?)`)
sb.WriteString(` OR (ranked.rank = ? AND t.timestamp = ? AND t.turn_id > ?)`)
sb.WriteString(` OR (ranked.rank = ? AND t.timestamp = ? AND t.turn_id = ? AND ranked.match_source > ?)`)
sb.WriteString(` OR (ranked.rank = ? AND t.timestamp = ? AND t.turn_id = ? AND ranked.match_source = ? AND ranked.rowid > ?)`)
sb.WriteString(`)`)
args = append(args,
after.Rank,
after.Rank, after.Timestamp,
after.Rank, after.Timestamp, after.TurnID,
after.Rank, after.Timestamp, after.TurnID, after.MatchSource,
after.Rank, after.Timestamp, after.TurnID, after.MatchSource, after.RowID,
)
}
sb.WriteString(` ORDER BY ranked.rank ASC, t.timestamp DESC, t.turn_id ASC, ranked.match_source ASC, ranked.rowid ASC LIMIT ?`)
args = append(args, limit)
return sb.String(), args
}
func appendFTSSelect(sb *strings.Builder, args *[]any, table, source string, priority int, f Filter) {
fmt.Fprintf(sb, `SELECT rowid, %d AS source_priority, '%s' AS match_source, bm25(%s) AS rank, snippet(%s, 0, char(2), char(3), '…', 12) AS snippet FROM %s WHERE %s MATCH ?`, priority, source, table, table, table, table)
*args = append(*args, f.Query)
appendFTSFilters(sb, args, table, f)
}
func appendFTSFilters(sb *strings.Builder, args *[]any, table string, f Filter) {
switch {
case f.Owner.AllOwners:
case f.Owner.SpecificOwner != nil:
fmt.Fprintf(sb, ` AND %s.owner = ?`, table)
*args = append(*args, *f.Owner.SpecificOwner)
default:
fmt.Fprintf(sb, ` AND %s.owner = ?`, table)
*args = append(*args, f.Owner.User)
}
if f.Tool != nil {
fmt.Fprintf(sb, ` AND %s.tool = ?`, table)
*args = append(*args, *f.Tool)
}
if f.Host != nil {
fmt.Fprintf(sb, ` AND %s.host = ?`, table)
*args = append(*args, *f.Host)
}
}
// EncodeCursor returns an opaque cursor bound to the normalized search tuple.
func EncodeCursor(c Cursor, f Filter) (string, error) {
p := cursorPayload{Version: cursorVersion, Cursor: c, FilterHash: filterHash(f)}
b, err := json.Marshal(p)
if err != nil {
return "", culpa.WithCode(culpa.Wrap(err, "encode search cursor"), "INTERNAL")
}
return base64.RawURLEncoding.EncodeToString(b), nil
}
// DecodeCursor parses an opaque cursor and rejects cursors created for another
// normalized query/filter tuple.
func DecodeCursor(raw string, f Filter) (Cursor, error) {
b, err := base64.RawURLEncoding.DecodeString(raw)
if err != nil {
return Cursor{}, invalid("invalid search cursor")
}
var p cursorPayload
if err := json.Unmarshal(b, &p); err != nil {
return Cursor{}, invalid("invalid search cursor")
}
if p.Version != cursorVersion || p.FilterHash != filterHash(f) || p.Cursor.TurnID == "" || p.Cursor.MatchSource == "" || p.Cursor.RowID <= 0 {
return Cursor{}, invalid("invalid search cursor")
}
return p.Cursor, nil
}
type cursorPayload struct {
Version int `json:"v"`
FilterHash string `json:"h"`
Cursor Cursor `json:"c"`
}
type normalizedFilter struct {
OwnerUser string `json:"owner_user"`
OwnerAllOwners bool `json:"owner_all_owners"`
OwnerSpecificOwner *string `json:"owner_specific_owner"`
Query string `json:"query"`
IncludeToolOutputs bool `json:"include_tool_outputs"`
Tool *string `json:"tool"`
Host *string `json:"host"`
Since *int64 `json:"since"`
Until *int64 `json:"until"`
}
func filterHash(f Filter) string {
n := normalizedFilter{
OwnerUser: f.Owner.User,
OwnerAllOwners: f.Owner.AllOwners,
OwnerSpecificOwner: f.Owner.SpecificOwner,
Query: strings.TrimSpace(f.Query),
IncludeToolOutputs: f.IncludeToolOutputs,
Tool: f.Tool,
Host: f.Host,
Since: f.Since,
Until: f.Until,
}
b, err := json.Marshal(n)
if err != nil {
panic(err)
}
sum := sha256.Sum256(b)
return hex.EncodeToString(sum[:])
}
func invalid(msg string) error {
return culpa.WithCode(culpa.New(msg), "INVALID")
}
func isFTSError(err error) bool {
if errors.Is(err, sql.ErrNoRows) {
return false
}
s := strings.ToLower(err.Error())
return strings.Contains(s, "fts5") || strings.Contains(s, "fts syntax") || strings.Contains(s, "malformed match") || strings.Contains(s, "unterminated string")
}