Coverage for mcp/tools/docs.py: 95%

1"""Documentation discovery MCP tool.

3Wraps two catalogs defined in ``mcp/resources/docs.py`` — ``DOC_METADATA``

4(the ``docs/*.md`` guides) and ``PACKAGE_DOC_METADATA`` (the package-level

5READMEs that live next to the code under ``mcp/``) — and exposes a single

6``find_docs`` tool the LLM can call with a free-text query plus an optional

7topic filter. The two catalogs are merged into one searchable view; each

8result carries a ``resource_uri`` so the caller knows the exact resource to

9fetch (``docs://gco/docs/{name}`` for a guide, ``docs://gco/packages/{name}``

10for a package README). Scoring is a deterministic weighted sum of topic and

11summary/name substring matches; results are sorted by score descending then

12name ascending so callers iterating with ``limit`` always see a stable

13ordering.

14"""

16from audit import audit_logged

17from resources.docs import DOC_METADATA, PACKAGE_DOC_METADATA

18from server import mcp

21def _catalog() -> dict[str, dict[str, str | list[str]]]:

22 """Return the merged doc catalog: ``docs/*.md`` guides plus package READMEs.

24 The two catalogs use disjoint key spaces by construction —

25 ``DOC_METADATA`` keys are uppercase doc stems (``ARCHITECTURE``) and

26 ``PACKAGE_DOC_METADATA`` keys are lowercase slugs (``mcp-mission``) — so a

27 plain merge never drops an entry.

28 """

29 return {**DOC_METADATA, **PACKAGE_DOC_METADATA}

32def _resource_uri(name: str) -> str:

33 """Map a catalog key to the resource URI that serves its content."""

34 if name in PACKAGE_DOC_METADATA:

35 return f"docs://gco/packages/{name}"

36 return f"docs://gco/docs/{name}"

39def _search(query: str | None, topic: str | None) -> list[tuple[str, int]]:

40 """Filter and score docs; return ``[(name, score), ...]`` sorted desc."""

41 results: list[tuple[str, int]] = []

42 q = query.lower() if query else None

43 t = topic.lower() if topic else None

44 for name, meta in _catalog().items():

45 score = 0

46 if t:

47 topics = meta.get("topics", [])

48 if isinstance(topics, list): 48 ↛ 54line 48 didn't jump to line 54 because the condition on line 48 was always true

49 for top in topics:

50 if t in str(top).lower():

51 score += 3

52 # Topic filter is a hard constraint — no match means drop the

53 # entry, even if a query string would have matched the summary.

54 if score == 0:

55 continue

56 if q:

57 # Keyword matches are the strongest free-text signal — every

58 # entry's ``keywords`` list is curated to surface terms a

59 # user is likely to search for (e.g. "vllm", "odcr",

60 # "global accelerator") even when those phrases don't appear

61 # verbatim in the summary.

62 keywords = meta.get("keywords", [])

63 if isinstance(keywords, list): 63 ↛ 67line 63 didn't jump to line 67 because the condition on line 63 was always true

64 for kw in keywords:

65 if q in str(kw).lower():

66 score += 4

67 summary = str(meta.get("summary", "")).lower()

68 if q in summary: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true

69 score += 1

70 if q in name.lower():

71 score += 1

72 # When the only signal is a query and it didn't hit, drop it.

73 if score == 0 and not t:

74 continue

75 results.append((name, score))

76 results.sort(key=lambda x: (-x[1], x[0]))

77 return results

80def _format(name: str) -> dict[str, object]:

81 """Format a metadata entry for the tool response."""

82 meta = _catalog().get(name, {})

83 return {

84 "name": name,

85 "resource_uri": _resource_uri(name),

86 "summary": meta.get("summary", ""),

87 "topics": meta.get("topics", []),

88 "keywords": meta.get("keywords", []),

89 "related": meta.get("related", []),

90 }

93@mcp.tool(tags={"safe", "docs"})

94@audit_logged

95async def find_docs(

96 query: str | None = None,

97 topic: str | None = None,

98 limit: int = 10,

99) -> list[dict[str, object]]:

100 """`find_docs` — search the docs catalog by topic and free-text query.

101

102 Searches both the ``docs/*.md`` guides and the package-level READMEs that

103 live next to the code under ``mcp/``. Each result carries a

104 ``resource_uri`` naming the exact resource to fetch

105 (``docs://gco/docs/{name}`` for a guide, ``docs://gco/packages/{name}``

106 for a package README).

107

108 Args:

109 query: Free-text query matched against the doc's keywords, summary,

110 and name (case-insensitive substring match).

111 topic: Filter by topic substring (case-insensitive). Acts as a hard

112 filter — entries without a topic match are dropped.

113 limit: Maximum results (default 10). ``limit <= 0`` returns ``[]``.

114

115 Scoring: topic substring matches contribute 3 pts each; keyword

116 substring matches contribute 4 pts each; summary/name substring

117 matches contribute 1 pt each. Returns the top ``limit`` matches

118 sorted by score descending then name ascending.

119 """

120 if limit <= 0:

121 return []

122 no_filters = not query and not topic

123 if no_filters:

124 # Stable alpha-sorted listing for the no-arg case.

125 names = sorted(_catalog().keys())[:limit]

126 return [_format(name) for name in names]

127 matches = _search(query, topic)

128 return [_format(name) for name, _score in matches[:limit]]