|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Generate llms.txt and llms-full.txt for the Hermes docs site. |
| 3 | +
|
| 4 | +Outputs: |
| 5 | + website/static/llms.txt — short curated index of the docs, one link per page, |
| 6 | + grouped by section. Conforms to https://llmstxt.org. |
| 7 | + website/static/llms-full.txt — every `.md` file under `website/docs/` concatenated, |
| 8 | + with `# <title>` headings and `<!-- source: … -->` |
| 9 | + comments separating files. |
| 10 | +
|
| 11 | +Both publish at: |
| 12 | + https://hermes-agent.nousresearch.com/docs/llms.txt |
| 13 | + https://hermes-agent.nousresearch.com/docs/llms-full.txt |
| 14 | +
|
| 15 | +The `/docs/` prefix is not a mistake — Docusaurus serves `website/static/` |
| 16 | +at the `docs/` base path. Clients and IDE plugins that probe the classic |
| 17 | +`/llms.txt` root will miss these. Document the canonical URLs in the docs |
| 18 | +index and in the repo README. |
| 19 | +
|
| 20 | +Called from `website/scripts/prebuild.mjs` on every `npm run start` / |
| 21 | +`npm run build` so the output stays in sync with the docs tree. |
| 22 | +""" |
| 23 | + |
| 24 | +from __future__ import annotations |
| 25 | + |
| 26 | +import re |
| 27 | +from pathlib import Path |
| 28 | + |
| 29 | +SCRIPT_DIR = Path(__file__).resolve().parent |
| 30 | +WEBSITE = SCRIPT_DIR.parent |
| 31 | +DOCS = WEBSITE / "docs" |
| 32 | +STATIC = WEBSITE / "static" |
| 33 | + |
| 34 | +SITE_BASE = "https://hermes-agent.nousresearch.com/docs" |
| 35 | + |
| 36 | +# Curated sections for llms.txt — mirrors the product story, not the filesystem. |
| 37 | +# Each entry: (docs-relative path without .md, display title, optional short desc). |
| 38 | +# `None` desc → pulled from frontmatter `description:` field. |
| 39 | +SECTIONS: list[tuple[str, list[tuple[str, str, str | None]]]] = [ |
| 40 | + ("Getting Started", [ |
| 41 | + ("getting-started/installation", "Installation", None), |
| 42 | + ("getting-started/quickstart", "Quickstart", None), |
| 43 | + ("getting-started/learning-path", "Learning Path", None), |
| 44 | + ("getting-started/updating", "Updating", None), |
| 45 | + ("getting-started/termux", "Termux (Android)", None), |
| 46 | + ("getting-started/nix-setup", "Nix Setup", None), |
| 47 | + ]), |
| 48 | + ("Using Hermes", [ |
| 49 | + ("user-guide/cli", "CLI", None), |
| 50 | + ("user-guide/tui", "TUI (Ink terminal UI)", None), |
| 51 | + ("user-guide/configuration", "Configuration", None), |
| 52 | + ("user-guide/configuring-models", "Configuring Models", None), |
| 53 | + ("user-guide/sessions", "Sessions", None), |
| 54 | + ("user-guide/profiles", "Profiles", None), |
| 55 | + ("user-guide/git-worktrees", "Git Worktrees", None), |
| 56 | + ("user-guide/docker", "Docker Backend", None), |
| 57 | + ("user-guide/security", "Security", None), |
| 58 | + ("user-guide/checkpoints-and-rollback", "Checkpoints & Rollback", None), |
| 59 | + ]), |
| 60 | + ("Core Features", [ |
| 61 | + ("user-guide/features/overview", "Features Overview", None), |
| 62 | + ("user-guide/features/tools", "Tools", None), |
| 63 | + ("user-guide/features/skills", "Skills System", None), |
| 64 | + ("user-guide/features/curator", "Curator", None), |
| 65 | + ("user-guide/features/memory", "Memory", None), |
| 66 | + ("user-guide/features/memory-providers", "Memory Providers", None), |
| 67 | + ("user-guide/features/context-files", "Context Files", None), |
| 68 | + ("user-guide/features/context-references", "Context References", None), |
| 69 | + ("user-guide/features/personality", "Personality & SOUL.md", None), |
| 70 | + ("user-guide/features/plugins", "Plugins", None), |
| 71 | + ("user-guide/features/built-in-plugins", "Built-in Plugins", None), |
| 72 | + ]), |
| 73 | + ("Automation", [ |
| 74 | + ("user-guide/features/cron", "Cron Jobs", None), |
| 75 | + ("user-guide/features/delegation", "Delegation", None), |
| 76 | + ("user-guide/features/kanban", "Kanban Multi-Agent", None), |
| 77 | + ("user-guide/features/kanban-tutorial", "Kanban Tutorial", None), |
| 78 | + ("user-guide/features/code-execution", "Code Execution", None), |
| 79 | + ("user-guide/features/hooks", "Hooks", None), |
| 80 | + ("user-guide/features/batch-processing", "Batch Processing", None), |
| 81 | + ]), |
| 82 | + ("Media & Web", [ |
| 83 | + ("user-guide/features/voice-mode", "Voice Mode", None), |
| 84 | + ("user-guide/features/browser", "Browser", None), |
| 85 | + ("user-guide/features/vision", "Vision", None), |
| 86 | + ("user-guide/features/image-generation", "Image Generation", None), |
| 87 | + ("user-guide/features/tts", "Text-to-Speech", None), |
| 88 | + ]), |
| 89 | + ("Messaging Platforms", [ |
| 90 | + ("user-guide/messaging/index", "Overview", None), |
| 91 | + ("user-guide/messaging/telegram", "Telegram", None), |
| 92 | + ("user-guide/messaging/discord", "Discord", None), |
| 93 | + ("user-guide/messaging/slack", "Slack", None), |
| 94 | + ("user-guide/messaging/whatsapp", "WhatsApp", None), |
| 95 | + ("user-guide/messaging/signal", "Signal", None), |
| 96 | + ("user-guide/messaging/email", "Email", None), |
| 97 | + ("user-guide/messaging/sms", "SMS", None), |
| 98 | + ("user-guide/messaging/matrix", "Matrix", None), |
| 99 | + ("user-guide/messaging/mattermost", "Mattermost", None), |
| 100 | + ("user-guide/messaging/homeassistant", "Home Assistant", None), |
| 101 | + ("user-guide/messaging/webhooks", "Webhooks", None), |
| 102 | + ]), |
| 103 | + ("Integrations", [ |
| 104 | + ("integrations/index", "Integrations Overview", None), |
| 105 | + ("integrations/providers", "Providers", None), |
| 106 | + ("user-guide/features/mcp", "MCP (Model Context Protocol)", None), |
| 107 | + ("user-guide/features/acp", "ACP (Agent Context Protocol)", None), |
| 108 | + ("user-guide/features/api-server", "API Server", None), |
| 109 | + ("user-guide/features/honcho", "Honcho Memory", None), |
| 110 | + ("user-guide/features/provider-routing", "Provider Routing", None), |
| 111 | + ("user-guide/features/fallback-providers", "Fallback Providers", None), |
| 112 | + ("user-guide/features/credential-pools", "Credential Pools", None), |
| 113 | + ]), |
| 114 | + ("Guides & Tutorials", [ |
| 115 | + ("guides/tips", "Tips & Best Practices", None), |
| 116 | + ("guides/local-llm-on-mac", "Local LLMs on Mac", None), |
| 117 | + ("guides/daily-briefing-bot", "Daily Briefing Bot", None), |
| 118 | + ("guides/team-telegram-assistant", "Team Telegram Assistant", None), |
| 119 | + ("guides/python-library", "Use Hermes as a Python Library", None), |
| 120 | + ("guides/use-mcp-with-hermes", "Use MCP with Hermes", None), |
| 121 | + ("guides/use-voice-mode-with-hermes", "Use Voice Mode with Hermes", None), |
| 122 | + ("guides/use-soul-with-hermes", "Use SOUL.md with Hermes", None), |
| 123 | + ("guides/build-a-hermes-plugin", "Build a Hermes Plugin", None), |
| 124 | + ("guides/automate-with-cron", "Automate with Cron", None), |
| 125 | + ("guides/work-with-skills", "Work with Skills", None), |
| 126 | + ("guides/delegation-patterns", "Delegation Patterns", None), |
| 127 | + ("guides/github-pr-review-agent", "GitHub PR Review Agent", None), |
| 128 | + ]), |
| 129 | + ("Developer Guide", [ |
| 130 | + ("developer-guide/contributing", "Contributing", None), |
| 131 | + ("developer-guide/architecture", "Architecture", None), |
| 132 | + ("developer-guide/agent-loop", "Agent Loop", None), |
| 133 | + ("developer-guide/prompt-assembly", "Prompt Assembly", None), |
| 134 | + ("developer-guide/context-compression-and-caching", "Context Compression & Caching", None), |
| 135 | + ("developer-guide/gateway-internals", "Gateway Internals", None), |
| 136 | + ("developer-guide/session-storage", "Session Storage", None), |
| 137 | + ("developer-guide/provider-runtime", "Provider Runtime", None), |
| 138 | + ("developer-guide/adding-tools", "Adding Tools", None), |
| 139 | + ("developer-guide/adding-providers", "Adding Providers", None), |
| 140 | + ("developer-guide/adding-platform-adapters", "Adding Platform Adapters", None), |
| 141 | + ("developer-guide/creating-skills", "Creating Skills", None), |
| 142 | + ("developer-guide/extending-the-cli", "Extending the CLI", None), |
| 143 | + ]), |
| 144 | + ("Reference", [ |
| 145 | + ("reference/cli-commands", "CLI Commands", None), |
| 146 | + ("reference/slash-commands", "Slash Commands", None), |
| 147 | + ("reference/profile-commands", "Profile Commands", None), |
| 148 | + ("reference/environment-variables", "Environment Variables", None), |
| 149 | + ("reference/tools-reference", "Tools Reference", None), |
| 150 | + ("reference/toolsets-reference", "Toolsets Reference", None), |
| 151 | + ("reference/mcp-config-reference", "MCP Config Reference", None), |
| 152 | + ("reference/model-catalog", "Model Catalog", None), |
| 153 | + ("reference/skills-catalog", "Bundled Skills Catalog", "Table of all ~90 skills bundled with Hermes"), |
| 154 | + ("reference/optional-skills-catalog", "Optional Skills Catalog", "Table of ~60 additional installable skills"), |
| 155 | + ("reference/faq", "FAQ & Troubleshooting", None), |
| 156 | + ]), |
| 157 | +] |
| 158 | + |
| 159 | + |
| 160 | +FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) |
| 161 | +DESC_RE = re.compile(r"^description:\s*[\"'](.+?)[\"']\s*$", re.MULTILINE) |
| 162 | +TITLE_RE = re.compile(r"^title:\s*[\"'](.+?)[\"']\s*$", re.MULTILINE) |
| 163 | + |
| 164 | + |
| 165 | +def read_frontmatter(path: Path) -> tuple[dict[str, str], str]: |
| 166 | + """Return ({title, description}, body-markdown) for a doc file.""" |
| 167 | + text = path.read_text(encoding="utf-8") |
| 168 | + m = FRONTMATTER_RE.match(text) |
| 169 | + meta: dict[str, str] = {} |
| 170 | + body = text |
| 171 | + if m: |
| 172 | + fm = m.group(1) |
| 173 | + body = text[m.end():] |
| 174 | + dm = DESC_RE.search(fm) |
| 175 | + if dm: |
| 176 | + meta["description"] = dm.group(1) |
| 177 | + tm = TITLE_RE.search(fm) |
| 178 | + if tm: |
| 179 | + meta["title"] = tm.group(1) |
| 180 | + return meta, body |
| 181 | + |
| 182 | + |
| 183 | +def resolve_desc(slug: str, provided: str | None) -> str: |
| 184 | + """Resolve short description for llms.txt entry.""" |
| 185 | + if provided: |
| 186 | + return provided |
| 187 | + path = DOCS / f"{slug}.md" |
| 188 | + if not path.exists(): |
| 189 | + path = DOCS / slug / "index.md" |
| 190 | + if not path.exists(): |
| 191 | + return "" |
| 192 | + meta, _ = read_frontmatter(path) |
| 193 | + return meta.get("description", "") |
| 194 | + |
| 195 | + |
| 196 | +def emit_llms_index() -> str: |
| 197 | + """Build the short llms.txt index.""" |
| 198 | + lines: list[str] = [] |
| 199 | + lines.append("# Hermes Agent") |
| 200 | + lines.append("") |
| 201 | + lines.append( |
| 202 | + "> The self-improving AI agent built by Nous Research. A terminal-native " |
| 203 | + "autonomous coding and task agent with persistent memory, agent-created skills, " |
| 204 | + "and a messaging gateway that lives on 15+ platforms (Telegram, Discord, Slack, " |
| 205 | + "SMS, Matrix, ...). Runs on local, Docker, SSH, Daytona, Modal, or Singularity " |
| 206 | + "backends. Works with Nous Portal, OpenRouter, OpenAI, Anthropic, Google, or any " |
| 207 | + "OpenAI-compatible endpoint." |
| 208 | + ) |
| 209 | + lines.append("") |
| 210 | + lines.append( |
| 211 | + "Install: `curl -fsSL https://raw.githubusercontent.com/NousResearch/" |
| 212 | + "hermes-agent/main/scripts/install.sh | bash` " |
| 213 | + "(Linux, macOS, WSL2, Termux)" |
| 214 | + ) |
| 215 | + lines.append("") |
| 216 | + lines.append("Repo: https://github.com/NousResearch/hermes-agent") |
| 217 | + lines.append("") |
| 218 | + |
| 219 | + for section, items in SECTIONS: |
| 220 | + lines.append(f"## {section}") |
| 221 | + lines.append("") |
| 222 | + for slug, title, desc_override in items: |
| 223 | + desc = resolve_desc(slug, desc_override) |
| 224 | + url = f"{SITE_BASE}/{slug}" |
| 225 | + if desc: |
| 226 | + lines.append(f"- [{title}]({url}): {desc}") |
| 227 | + else: |
| 228 | + lines.append(f"- [{title}]({url})") |
| 229 | + lines.append("") |
| 230 | + return "\n".join(lines).rstrip() + "\n" |
| 231 | + |
| 232 | + |
| 233 | +def emit_llms_full() -> str: |
| 234 | + """Concatenate every doc under website/docs/ into a single markdown file. |
| 235 | +
|
| 236 | + Order: mirrors the curated SECTIONS list first (so the most important |
| 237 | + pages are front-loaded for agents that truncate on token budget), then |
| 238 | + appends any remaining .md files sorted by path. |
| 239 | + """ |
| 240 | + seen: set[Path] = set() |
| 241 | + chunks: list[str] = [ |
| 242 | + "# Hermes Agent — Full Documentation\n", |
| 243 | + ( |
| 244 | + "This file is the entire Hermes Agent documentation concatenated for LLM " |
| 245 | + "context ingestion. Section order reflects docs-site navigation: Getting " |
| 246 | + "Started, Using Hermes, Features, Messaging, Integrations, Guides, " |
| 247 | + "Developer Guide, Reference, then everything else.\n" |
| 248 | + ), |
| 249 | + "Canonical site: https://hermes-agent.nousresearch.com/docs\n", |
| 250 | + "Short index: https://hermes-agent.nousresearch.com/docs/llms.txt\n", |
| 251 | + "\n---\n\n", |
| 252 | + ] |
| 253 | + |
| 254 | + def emit_file(rel: str) -> None: |
| 255 | + path = DOCS / f"{rel}.md" |
| 256 | + if not path.exists(): |
| 257 | + path = DOCS / rel / "index.md" |
| 258 | + if not path.exists() or path in seen: |
| 259 | + return |
| 260 | + seen.add(path) |
| 261 | + meta, body = read_frontmatter(path) |
| 262 | + title = meta.get("title") or rel |
| 263 | + chunks.append(f"<!-- source: website/docs/{path.relative_to(DOCS)} -->\n") |
| 264 | + chunks.append(f"# {title}\n\n") |
| 265 | + chunks.append(body.rstrip() + "\n\n---\n\n") |
| 266 | + |
| 267 | + # Curated order first |
| 268 | + for _, items in SECTIONS: |
| 269 | + for slug, _t, _d in items: |
| 270 | + emit_file(slug) |
| 271 | + |
| 272 | + # Everything else (sorted, skipping already emitted and auto-gen skill pages |
| 273 | + # — those are covered by the two catalog reference pages, emitting every |
| 274 | + # individual skill would add ~1.4 MB of largely duplicative material). |
| 275 | + for path in sorted(DOCS.rglob("*.md")): |
| 276 | + if path in seen: |
| 277 | + continue |
| 278 | + rel = path.relative_to(DOCS) |
| 279 | + parts = rel.parts |
| 280 | + if len(parts) >= 3 and parts[0] == "user-guide" and parts[1] == "skills" \ |
| 281 | + and parts[2] in ("bundled", "optional"): |
| 282 | + continue |
| 283 | + seen.add(path) |
| 284 | + meta, body = read_frontmatter(path) |
| 285 | + title = meta.get("title") or str(rel) |
| 286 | + chunks.append(f"<!-- source: website/docs/{rel} -->\n") |
| 287 | + chunks.append(f"# {title}\n\n") |
| 288 | + chunks.append(body.rstrip() + "\n\n---\n\n") |
| 289 | + |
| 290 | + return "".join(chunks).rstrip() + "\n" |
| 291 | + |
| 292 | + |
| 293 | +def main() -> None: |
| 294 | + STATIC.mkdir(exist_ok=True) |
| 295 | + index = emit_llms_index() |
| 296 | + full = emit_llms_full() |
| 297 | + (STATIC / "llms.txt").write_text(index, encoding="utf-8") |
| 298 | + (STATIC / "llms-full.txt").write_text(full, encoding="utf-8") |
| 299 | + print(f"Wrote {STATIC / 'llms.txt'} ({len(index):,} bytes)") |
| 300 | + print(f"Wrote {STATIC / 'llms-full.txt'} ({len(full):,} bytes)") |
| 301 | + |
| 302 | + |
| 303 | +if __name__ == "__main__": |
| 304 | + main() |
0 commit comments