Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions docs/hooks/llms_txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
"""Generate llms.txt, llms-full.txt, and per-page markdown (https://llmstxt.org/).

The hook publishes three artifacts into the built site:

- `llms.txt`: a markdown index of the documentation, one link per page,
grouped by nav section.
- a `.md` rendition of every prose page next to its HTML (e.g.
`server/index.md`), which is what the llms.txt links point at.
- `llms-full.txt`: every prose page concatenated for single-fetch consumption.

Page markdown is the source markdown with `--8<--` snippet includes resolved
and relative links rewritten to absolute URLs. The API reference page
(`api.md`) is a mkdocstrings stub with no markdown source, so it is linked as
rendered HTML from an Optional section instead of being embedded.

Incremental builds (`mkdocs build --dirty`) are rejected: they skip unmodified
pages, which would silently truncate the generated artifacts.
"""

from __future__ import annotations

import posixpath
import re
from dataclasses import dataclass, field
from pathlib import Path

from mkdocs.config.defaults import MkDocsConfig
from mkdocs.exceptions import PluginError
from mkdocs.structure.files import File, Files
from mkdocs.structure.nav import Navigation, Section
from mkdocs.structure.pages import Page

# Pages with no markdown source, linked as HTML under "## Optional".
_OPTIONAL_PAGES = [
("api.md", "API reference", "Auto-generated API reference for the mcp package (rendered HTML)"),
]

_SNIPPET_LINE = re.compile(r'^(?P<indent>[ \t]*)--8<-- "(?P<path>[^"\n]+)"$', flags=re.MULTILINE)
_MD_LINK = re.compile(r'(\]\()([^)\s]+\.md)(#[^)\s]*)?( +"[^"]*")?(\))')


@dataclass
class _State:
page_markdown: dict[str, str] = field(default_factory=dict)
rendition_uris: set[str] = field(default_factory=set)
nav: Navigation | None = None
files: Files | None = None


_state = _State()


def _site_url(config: MkDocsConfig) -> str:
assert config.site_url is not None
return config.site_url.rstrip("/") + "/"


def _md_uri(file: File) -> str:
return re.sub(r"\.html$", ".md", file.dest_uri)


def on_config(config: MkDocsConfig) -> None:
# `mkdocs serve` rebuilds reuse the imported module; start each build clean.
_state.page_markdown.clear()
_state.rendition_uris.clear()
_state.nav = _state.files = None


def on_nav(nav: Navigation, config: MkDocsConfig, files: Files) -> None:
_state.nav = nav
_state.files = files
_state.rendition_uris.update(page.file.src_uri for page in nav.pages if page.file.src_uri != "api.md")


def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, files: Files) -> str | None:
if page.file.src_uri not in _state.rendition_uris:
return None

# Same anchor as the pymdownx.snippets `base_path` in mkdocs.yml.
repo_root = Path(config.config_file_path).parent

def include(match: re.Match[str]) -> str:
indent, path = match["indent"], match["path"]
# Mirror the snippets extension's restrict_base_path: reject paths
# that resolve outside the repo root.
resolved_path = (repo_root / path).resolve()
if not resolved_path.is_relative_to(repo_root.resolve()):
raise PluginError(f"llms_txt: snippet path {path!r} in {page.file.src_uri} escapes the repo root")
try:
content = resolved_path.read_text(encoding="utf-8").rstrip("\n")
except OSError as exc:
raise PluginError(f"llms_txt: cannot read snippet {path!r} in {page.file.src_uri}") from exc
# Keep a pointer to the embedded file so readers can find it on disk.
if path.endswith(".py"):
content = f"# {path}\n{content}"
if indent:
content = "\n".join(indent + line if line else line for line in content.split("\n"))
return content

resolved, substitutions = _SNIPPET_LINE.subn(include, markdown)

@cubic-dev-ai cubic-dev-ai Bot Jun 30, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Check for unconsumed snippet markers after substitutions, not in the original page markdown. Included snippets can otherwise leak unresolved --8<-- lines into llms artifacts.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At docs/hooks/llms_txt.py, line 100:

<comment>Check for unconsumed snippet markers after substitutions, not in the original page markdown. Included snippets can otherwise leak unresolved `--8<--` lines into llms artifacts.</comment>

<file context>
@@ -0,0 +1,178 @@
+            content = "\n".join(indent + line if line else line for line in content.split("\n"))
+        return content
+
+    resolved, substitutions = _SNIPPET_LINE.subn(include, markdown)
+    if substitutions != sum("--8<--" in line for line in markdown.splitlines()):
+        raise PluginError(f"llms_txt: unresolved snippet include in {page.file.src_uri}")
</file context>
Fix with cubic

if substitutions != sum("--8<--" in line for line in markdown.splitlines()):
raise PluginError(f"llms_txt: unresolved snippet include in {page.file.src_uri}")

site_url = _site_url(config)
src_dir = posixpath.dirname(page.file.src_uri)

def rewrite(match: re.Match[str]) -> str:
opening, target, anchor, title, closing = match.groups()
if "://" in target:
return match.group(0)
linked = files.get_file_from_path(posixpath.normpath(posixpath.join(src_dir, target)))
if linked is None:
raise PluginError(f"llms_txt: cannot resolve link target {target!r} in {page.file.src_uri}")
# Pages without a markdown rendition (the api.md stub) link to their HTML instead.
url = _md_uri(linked) if linked.src_uri in _state.rendition_uris else linked.url
return f"{opening}{site_url}{url}{anchor or ''}{title or ''}{closing}"

_state.page_markdown[page.file.src_uri] = _MD_LINK.sub(rewrite, resolved)
return None


def _section_pages(section: Section) -> list[Page]:
pages: list[Page] = []
for child in section.children:
if isinstance(child, Page) and child.file.src_uri in _state.rendition_uris:
pages.append(child)
elif isinstance(child, Section):
pages.extend(_section_pages(child))
return pages


def on_post_build(config: MkDocsConfig) -> None:
assert _state.nav is not None and _state.files is not None
missing = _state.rendition_uris - _state.page_markdown.keys()
if missing:
raise PluginError(f"llms_txt: pages skipped this build (is this a --dirty build?): {sorted(missing)}")

site_dir = Path(config.site_dir)
site_url = _site_url(config)

top_level = [
item for item in _state.nav.items if isinstance(item, Page) and item.file.src_uri in _state.rendition_uris
]
sections: list[tuple[str, list[Page]]] = [("Docs", top_level)] if top_level else []
for item in _state.nav.items:
if isinstance(item, Section):
pages = _section_pages(item)
if pages:
sections.append((item.title, pages))

index = [f"# {config.site_name}", "", f"> {config.site_description}", ""]
full: list[str] = []
for title, pages in sections:
index += [f"## {title}", ""]
for page in pages:
markdown = _state.page_markdown[page.file.src_uri]
(site_dir / _md_uri(page.file)).write_text(markdown, encoding="utf-8")

description = page.meta.get("description")
tail = f": {description}" if description else ""
index.append(f"- [{page.title}]({site_url}{_md_uri(page.file)}){tail}")

body, h1_found = re.subn(r"\A\s*# .+\n", "", markdown)
if not h1_found:
raise PluginError(f"llms_txt: page {page.file.src_uri} does not start with an H1")
full += [f"# {page.title}", "", f"Source: {page.canonical_url}", "", body.strip(), ""]
index.append("")

index += ["## Optional", ""]
for src_uri, title, description in _OPTIONAL_PAGES:
linked = _state.files.get_file_from_path(src_uri)
if linked is None:
raise PluginError(f"llms_txt: optional page {src_uri} not found")
index.append(f"- [{title}]({site_url}{linked.url}): {description}")
index.append("")

(site_dir / "llms.txt").write_text("\n".join(index), encoding="utf-8")
(site_dir / "llms-full.txt").write_text("\n".join(full), encoding="utf-8")
6 changes: 6 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,9 @@ npx -y @modelcontextprotocol/inspector
## API Reference

Full API documentation is available in the [API Reference](api.md).

## llms.txt

Reading with an LLM? This documentation is also published in the [llms.txt](https://llmstxt.org/) format:
[llms.txt](https://py.sdk.modelcontextprotocol.io/llms.txt) is an index of the pages, and
[llms-full.txt](https://py.sdk.modelcontextprotocol.io/llms-full.txt) contains every page in a single file.
12 changes: 9 additions & 3 deletions mkdocs.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
site_name: MCP Server
site_description: MCP Server
site_name: MCP Python SDK
site_description: The official Python SDK for the Model Context Protocol
strict: true

repo_name: modelcontextprotocol/python-sdk
Expand Down Expand Up @@ -85,7 +85,10 @@
- pymdownx.critic
- pymdownx.mark
- pymdownx.superfences
- pymdownx.snippets
# Resolve snippet includes against the repo root regardless of the build's
# working directory (the extension's default base_path is the CWD).
- pymdownx.snippets:
base_path: !relative $config_dir
- pymdownx.tilde
- pymdownx.inlinehilite
- pymdownx.highlight:
Expand All @@ -111,6 +114,9 @@
watch:
- src/mcp

hooks:
- docs/hooks/llms_txt.py

Check warning on line 118 in mkdocs.yml

View check run for this annotation

Claude / Claude Code Review

Hook source published as static site asset

The hook source `docs/hooks/llms_txt.py` lives inside the default `docs_dir`, and since `mkdocs.yml` sets no `exclude_docs`, MkDocs copies it verbatim into the built site, publishing it at `/hooks/llms_txt.py` as a stray static asset. Harmless, but easy to avoid by adding `exclude_docs: hooks/` or moving the hook outside `docs/` — note the same layout exists on main from #3024, so any cleanup should land on both branches to preserve parity.
Comment on lines +117 to +118

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 The hook source docs/hooks/llms_txt.py lives inside the default docs_dir, and since mkdocs.yml sets no exclude_docs, MkDocs copies it verbatim into the built site, publishing it at /hooks/llms_txt.py as a stray static asset. Harmless, but easy to avoid by adding exclude_docs: hooks/ or moving the hook outside docs/ — note the same layout exists on main from #3024, so any cleanup should land on both branches to preserve parity.

Extended reasoning...

What happens: MkDocs collects every file under docs_dir (the default docs/ here — mkdocs.yml sets no docs_dir override). Files that are not markdown pages and don't match exclude_docs (which is unset; the built-in defaults only exclude dot-prefixed paths and /templates/) are treated as media files and copied verbatim into site_dir. Listing the script under the hooks: key (mkdocs.yml:117-118) only tells MkDocs to import it as a build hook — it does not exempt it from the file collection. The MkDocs documentation's own hooks example places such scripts outside docs_dir for exactly this reason.

Concrete walk-through:

  1. mkdocs build --strict runs; the file collector walks docs/ and finds docs/hooks/llms_txt.py.
  2. .py is not a recognized documentation page extension and the path matches no exclusion, so it's classified as a media file with dest_uri = hooks/llms_txt.py.
  3. During the build it is copied byte-for-byte into site/hooks/llms_txt.py.
  4. The deploy workflow publishes the v1.x build at the site root, so the hook source becomes reachable at https://py.sdk.modelcontextprotocol.io/hooks/llms_txt.py.
  5. Strict mode raises no warning — omitted_files/unrecognized_links validation only applies to markdown pages and links, so the artifact ships silently.

Why nothing prevents it: the new hook adds itself to hooks: but the config adds no corresponding exclude_docs entry, and the hook itself only filters what goes into llms.txt/llms-full.txt (nav pages with markdown sources) — it has no effect on which static files MkDocs copies.

Impact: negligible. The source is already public on GitHub, the file isn't linked from any page, doesn't appear in nav/search/llms.txt, and can't break the build. It's purely an unintended deployment artifact.

On the parity argument: it's true this layout mirrors the already-merged #3024 on main (which has the same issue and ships its hooks under docs/hooks/ with no exclude_docs), and the PR explicitly aims for byte-identical parity with that hook. That makes this not a defect introduced by the backport's logic, but the artifact is still unintended on both branches. The right move is to fix it consistently — either add to both branches' mkdocs.yml:

exclude_docs: |
  hooks/

or move the hooks to a top-level hooks//scripts/ directory (and update the hooks: paths) on main first, then carry that into this backport. Not blocking — flagging so it can be tidied whenever convenient.


plugins:
- search
- social
Expand Down
Loading