Coverage for mcp/tools/docs.py: 95%
53 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Documentation discovery MCP tool.
3Wraps two catalogs defined in ``mcp/resources/docs.py`` — ``DOC_METADATA``
4(the ``docs/*.md`` guides) and ``PACKAGE_DOC_METADATA`` (the package-level
5READMEs that live next to the code under ``mcp/``) — and exposes a single
6``find_docs`` tool the LLM can call with a free-text query plus an optional
7topic filter. The two catalogs are merged into one searchable view; each
8result carries a ``resource_uri`` so the caller knows the exact resource to
9fetch (``docs://gco/docs/{name}`` for a guide, ``docs://gco/packages/{name}``
10for a package README). Scoring is a deterministic weighted sum of topic and
11summary/name substring matches; results are sorted by score descending then
12name ascending so callers iterating with ``limit`` always see a stable
13ordering.
14"""
16from audit import audit_logged
17from resources.docs import DOC_METADATA, PACKAGE_DOC_METADATA
18from server import mcp
21def _catalog() -> dict[str, dict[str, str | list[str]]]:
22 """Return the merged doc catalog: ``docs/*.md`` guides plus package READMEs.
24 The two catalogs use disjoint key spaces by construction —
25 ``DOC_METADATA`` keys are uppercase doc stems (``ARCHITECTURE``) and
26 ``PACKAGE_DOC_METADATA`` keys are lowercase slugs (``mcp-mission``) — so a
27 plain merge never drops an entry.
28 """
29 return {**DOC_METADATA, **PACKAGE_DOC_METADATA}
32def _resource_uri(name: str) -> str:
33 """Map a catalog key to the resource URI that serves its content."""
34 if name in PACKAGE_DOC_METADATA:
35 return f"docs://gco/packages/{name}"
36 return f"docs://gco/docs/{name}"
39def _search(query: str | None, topic: str | None) -> list[tuple[str, int]]:
40 """Filter and score docs; return ``[(name, score), ...]`` sorted desc."""
41 results: list[tuple[str, int]] = []
42 q = query.lower() if query else None
43 t = topic.lower() if topic else None
44 for name, meta in _catalog().items():
45 score = 0
46 if t:
47 topics = meta.get("topics", [])
48 if isinstance(topics, list): 48 ↛ 54line 48 didn't jump to line 54 because the condition on line 48 was always true
49 for top in topics:
50 if t in str(top).lower():
51 score += 3
52 # Topic filter is a hard constraint — no match means drop the
53 # entry, even if a query string would have matched the summary.
54 if score == 0:
55 continue
56 if q:
57 # Keyword matches are the strongest free-text signal — every
58 # entry's ``keywords`` list is curated to surface terms a
59 # user is likely to search for (e.g. "vllm", "odcr",
60 # "global accelerator") even when those phrases don't appear
61 # verbatim in the summary.
62 keywords = meta.get("keywords", [])
63 if isinstance(keywords, list): 63 ↛ 67line 63 didn't jump to line 67 because the condition on line 63 was always true
64 for kw in keywords:
65 if q in str(kw).lower():
66 score += 4
67 summary = str(meta.get("summary", "")).lower()
68 if q in summary: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true
69 score += 1
70 if q in name.lower():
71 score += 1
72 # When the only signal is a query and it didn't hit, drop it.
73 if score == 0 and not t:
74 continue
75 results.append((name, score))
76 results.sort(key=lambda x: (-x[1], x[0]))
77 return results
80def _format(name: str) -> dict[str, object]:
81 """Format a metadata entry for the tool response."""
82 meta = _catalog().get(name, {})
83 return {
84 "name": name,
85 "resource_uri": _resource_uri(name),
86 "summary": meta.get("summary", ""),
87 "topics": meta.get("topics", []),
88 "keywords": meta.get("keywords", []),
89 "related": meta.get("related", []),
90 }
93@mcp.tool(tags={"safe", "docs"})
94@audit_logged
95async def find_docs(
96 query: str | None = None,
97 topic: str | None = None,
98 limit: int = 10,
99) -> list[dict[str, object]]:
100 """`find_docs` — search the docs catalog by topic and free-text query.
102 Searches both the ``docs/*.md`` guides and the package-level READMEs that
103 live next to the code under ``mcp/``. Each result carries a
104 ``resource_uri`` naming the exact resource to fetch
105 (``docs://gco/docs/{name}`` for a guide, ``docs://gco/packages/{name}``
106 for a package README).
108 Args:
109 query: Free-text query matched against the doc's keywords, summary,
110 and name (case-insensitive substring match).
111 topic: Filter by topic substring (case-insensitive). Acts as a hard
112 filter — entries without a topic match are dropped.
113 limit: Maximum results (default 10). ``limit <= 0`` returns ``[]``.
115 Scoring: topic substring matches contribute 3 pts each; keyword
116 substring matches contribute 4 pts each; summary/name substring
117 matches contribute 1 pt each. Returns the top ``limit`` matches
118 sorted by score descending then name ascending.
119 """
120 if limit <= 0:
121 return []
122 no_filters = not query and not topic
123 if no_filters:
124 # Stable alpha-sorted listing for the no-arg case.
125 names = sorted(_catalog().keys())[:limit]
126 return [_format(name) for name in names]
127 matches = _search(query, topic)
128 return [_format(name) for name, _score in matches[:limit]]