~bigbes/sourcehut-root

ref: dac87f392932225a36061838adbea0d8fa3cee44 sourcehut-root/.claude/scripts/build-index.sh -rwxr-xr-x 7.1 KiB
dac87f39 — Eugene Blikh Add BSD 2-Clause license for original workspace files 6 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env bash
# Regenerate .claude/INDEX.md — a per-service inventory of GraphQL types, SQL
# tables, Python blueprints, and notable Go packages. Read by the
# sourcehut-lookup skill as the first step of any documentation lookup.
#
# Runs in seconds. Safe to re-run. Called as the final step of sourcehut-refresh.

set -u
cd "$(dirname "$0")/../.." || exit 1
ROOT="$PWD"
OUT="$ROOT/.claude/INDEX.md"

# Comma-join stdin lines, sorted unique.
join_csv() { sort -u | paste -sd',' - | sed 's/,/, /g'; }

# Identify each direct child that is a git repo. Accept both regular .git
# directories (standalone clones) and .git files (submodule gitlinks, which
# is what this workspace uses after `git submodule absorbgitdirs`).
mapfile -t REPOS < <(
  find . -mindepth 2 -maxdepth 2 -name .git \( -type d -o -type f \) 2>/dev/null \
    | sed -E 's:^\./([^/]+)/\.git$:\1:' \
    | sort
)

tmp=$(mktemp)
trap 'rm -f "$tmp"' EXIT

{
  echo "# SourceHut documentation mirror — index"
  echo
  echo "Auto-generated by \`.claude/scripts/build-index.sh\`. Regenerated at the end of every \`sourcehut-refresh\` run. Do not edit by hand — changes will be overwritten."
  echo
  echo "Read this **before** grepping. Each section gives you the canonical place to look for symbols in that repo."
  echo
  echo "_Generated: $(date -u +'%Y-%m-%d %H:%M:%S UTC')_"
  echo
  echo "## Repos"
  echo
  for repo in "${REPOS[@]}"; do
    tag=$(git -C "$repo" describe --tags --exact-match 2>/dev/null \
          || git -C "$repo" rev-parse --abbrev-ref HEAD 2>/dev/null)
    echo "- \`$repo\` — $tag"
  done
  echo
} > "$tmp"

extract_one() {
  local repo="$1"
  local tag
  tag=$(git -C "$repo" describe --tags --exact-match 2>/dev/null \
        || echo "(branch: $(git -C "$repo" rev-parse --abbrev-ref HEAD 2>/dev/null))")

  echo "## $repo"
  echo
  echo "_Tag:_ \`$tag\`"
  echo

  # GraphQL types — typically <repo>/api/graph/schema.graphqls
  local graphqls
  graphqls=$(find "$repo" -name '*.graphqls' -not -path '*/node_modules/*' 2>/dev/null)
  if [ -n "$graphqls" ]; then
    local gqlpath
    gqlpath=$(printf '%s\n' "$graphqls" | head -n1 | sed "s|^$repo/||")
    echo "**GraphQL types** (\`$gqlpath\`)"
    echo
    for kind in type input enum interface union scalar; do
      local names
      names=$(grep -rhE "^${kind} [A-Z][A-Za-z0-9_]*" $graphqls 2>/dev/null \
              | awk -v k="$kind" '{print $2}' \
              | sed 's/[({].*//' \
              | join_csv)
      [ -n "$names" ] && echo "- \`${kind}\`: $names"
    done
    echo
  fi

  # SQL tables — <repo>/schema.sql
  if [ -f "$repo/schema.sql" ]; then
    local tables
    tables=$(grep -hE '^CREATE TABLE' "$repo/schema.sql" 2>/dev/null \
             | sed -E 's/CREATE TABLE (IF NOT EXISTS )?"?([A-Za-z0-9_]+)"?.*/\2/' \
             | join_csv)
    if [ -n "$tables" ]; then
      echo "**SQL tables** (\`schema.sql\`)"
      echo
      echo "$tables"
      echo
    fi
    # Migrations count
    if [ -d "$repo/migrations" ]; then
      local mcount latest
      mcount=$(find "$repo/migrations" -name '*.sql' 2>/dev/null | wc -l | tr -d ' ')
      latest=$(find "$repo/migrations" -name '*.sql' 2>/dev/null | sort | tail -n1 | sed "s|$repo/||")
      [ "$mcount" -gt 0 ] && echo "_Migrations: $mcount files, latest \`$latest\`_" && echo
    fi
  fi

  # Python blueprints — <repo>/<svc>srht/blueprints/*.py (require a prefix so we
  # don't match core.sr.ht's bare `srht/` — that's handled separately below)
  local py_pkg
  py_pkg=$(find "$repo" -maxdepth 2 -type d -name '?*srht' -not -path '*/contrib/*' 2>/dev/null | head -n1)
  if [ -n "$py_pkg" ] && [ -d "$py_pkg/blueprints" ]; then
    local bps
    bps=$(find "$py_pkg/blueprints" -maxdepth 1 -name '*.py' -not -name '__init__.py' 2>/dev/null \
          | xargs -n1 basename 2>/dev/null \
          | sed 's/\.py$//' \
          | join_csv)
    [ -n "$bps" ] && echo "**Python blueprints** (\`$(basename "$py_pkg")/blueprints/\`): $bps" && echo
  fi
  # Python GraphQL client queries
  if [ -n "$py_pkg" ] && [ -d "$py_pkg/graphql" ]; then
    local pyql
    pyql=$(find "$py_pkg/graphql" -maxdepth 1 -name '*.graphql' 2>/dev/null \
          | xargs -n1 basename 2>/dev/null | sed 's/\.graphql$//' | join_csv)
    [ -n "$pyql" ] && echo "**Python GraphQL queries** (\`$(basename "$py_pkg")/graphql/\`): $pyql" && echo
  fi

  # Notable Go subdirs — cmd/*, top-level dirs with *.go, plus root-level .go files
  if [ -f "$repo/go.mod" ]; then
    local cmds gopkgs rootgo
    cmds=$(find "$repo/cmd" -mindepth 1 -maxdepth 1 -type d 2>/dev/null \
           | xargs -n1 basename 2>/dev/null | join_csv)
    [ -n "$cmds" ] && echo "**Go binaries** (\`cmd/\`): $cmds" && echo
    # Repo-root .go files (e.g. api.sr.ht has main.go + auth.go at top)
    rootgo=$(find "$repo" -maxdepth 1 -name '*.go' 2>/dev/null \
             | xargs -n1 basename 2>/dev/null | sort | join_csv)
    [ -n "$rootgo" ] && echo "**Go files (root)**: $rootgo" && echo
    # Top-level Go packages
    gopkgs=$(find "$repo" -mindepth 2 -maxdepth 2 -name '*.go' -not -path '*/generated*' 2>/dev/null \
             | sed -E "s:^$repo/::; s:/[^/]+\.go$::" \
             | sort -u \
             | grep -vE '^(cmd|migrations|node_modules)' \
             | head -20 \
             | join_csv)
    [ -n "$gopkgs" ] && echo "**Go packages**: \`$gopkgs\`" && echo
  fi

  # Plain Python package (no `<svc>srht` directory) — e.g. core.sr.ht ships `srht/`
  if [ -z "$py_pkg" ] && [ -d "$repo/srht" ]; then
    local srht_mods
    srht_mods=$(find "$repo/srht" -maxdepth 1 -name '*.py' -not -name '__init__.py' 2>/dev/null \
                | xargs -n1 basename 2>/dev/null | sed 's/\.py$//' | join_csv)
    local srht_subs
    srht_subs=$(find "$repo/srht" -mindepth 1 -maxdepth 1 -type d -not -name '__pycache__' 2>/dev/null \
                | xargs -n1 basename 2>/dev/null | join_csv)
    [ -n "$srht_mods" ] && echo "**Python modules** (\`srht/\`): $srht_mods" && echo
    [ -n "$srht_subs" ] && echo "**Python subpackages** (\`srht/\`): $srht_subs" && echo
  fi

  echo "---"
  echo
}

for repo in "${REPOS[@]}"; do
  extract_one "$repo" >> "$tmp"
done

# Cross-repo symbol map: which repos define each common GraphQL type name?
{
  echo "## Cross-repo GraphQL type map"
  echo
  echo "When the same type name appears in multiple repos, it is a *different* type in each — they are not federated as one. Use this map to pick the right service before reading."
  echo
  declare -A type_to_repos
  for repo in "${REPOS[@]}"; do
    while IFS= read -r name; do
      [ -z "$name" ] && continue
      type_to_repos[$name]+="$repo "
    done < <(
      find "$repo" -name '*.graphqls' -exec grep -hE '^(type|input|enum) [A-Z][A-Za-z0-9_]*' {} + 2>/dev/null \
        | awk '{print $2}' | sed 's/[({].*//' | sort -u
    )
  done
  # Print only types defined in >=2 repos (the interesting cases).
  for name in $(printf '%s\n' "${!type_to_repos[@]}" | sort); do
    repos="${type_to_repos[$name]}"
    count=$(printf '%s\n' $repos | wc -l | tr -d ' ')
    if [ "$count" -ge 2 ]; then
      echo "- \`$name\` → $(printf '%s, ' $repos | sed 's/, $//')"
    fi
  done
} >> "$tmp"

mv "$tmp" "$OUT"
trap - EXIT
echo "Wrote $OUT ($(wc -l < "$OUT" | tr -d ' ') lines, $(wc -c < "$OUT" | tr -d ' ') bytes)"