Coverage for mcp/tools/docs.py: 95%

53 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-15 15:07 +0000

1"""Documentation discovery MCP tool. 

2 

3Wraps two catalogs defined in ``mcp/resources/docs.py`` — ``DOC_METADATA`` 

4(the ``docs/*.md`` guides) and ``PACKAGE_DOC_METADATA`` (the package-level 

5READMEs that live next to the code under ``mcp/``) — and exposes a single 

6``find_docs`` tool the LLM can call with a free-text query plus an optional 

7topic filter. The two catalogs are merged into one searchable view; each 

8result carries a ``resource_uri`` so the caller knows the exact resource to 

9fetch (``docs://gco/docs/{name}`` for a guide, ``docs://gco/packages/{name}`` 

10for a package README). Scoring is a deterministic weighted sum of topic and 

11summary/name substring matches; results are sorted by score descending then 

12name ascending so callers iterating with ``limit`` always see a stable 

13ordering. 

14""" 

15 

16from audit import audit_logged 

17from resources.docs import DOC_METADATA, PACKAGE_DOC_METADATA 

18from server import mcp 

19 

20 

21def _catalog() -> dict[str, dict[str, str | list[str]]]: 

22 """Return the merged doc catalog: ``docs/*.md`` guides plus package READMEs. 

23 

24 The two catalogs use disjoint key spaces by construction — 

25 ``DOC_METADATA`` keys are uppercase doc stems (``ARCHITECTURE``) and 

26 ``PACKAGE_DOC_METADATA`` keys are lowercase slugs (``mcp-mission``) — so a 

27 plain merge never drops an entry. 

28 """ 

29 return {**DOC_METADATA, **PACKAGE_DOC_METADATA} 

30 

31 

32def _resource_uri(name: str) -> str: 

33 """Map a catalog key to the resource URI that serves its content.""" 

34 if name in PACKAGE_DOC_METADATA: 

35 return f"docs://gco/packages/{name}" 

36 return f"docs://gco/docs/{name}" 

37 

38 

39def _search(query: str | None, topic: str | None) -> list[tuple[str, int]]: 

40 """Filter and score docs; return ``[(name, score), ...]`` sorted desc.""" 

41 results: list[tuple[str, int]] = [] 

42 q = query.lower() if query else None 

43 t = topic.lower() if topic else None 

44 for name, meta in _catalog().items(): 

45 score = 0 

46 if t: 

47 topics = meta.get("topics", []) 

48 if isinstance(topics, list): 48 ↛ 54line 48 didn't jump to line 54 because the condition on line 48 was always true

49 for top in topics: 

50 if t in str(top).lower(): 

51 score += 3 

52 # Topic filter is a hard constraint — no match means drop the 

53 # entry, even if a query string would have matched the summary. 

54 if score == 0: 

55 continue 

56 if q: 

57 # Keyword matches are the strongest free-text signal — every 

58 # entry's ``keywords`` list is curated to surface terms a 

59 # user is likely to search for (e.g. "vllm", "odcr", 

60 # "global accelerator") even when those phrases don't appear 

61 # verbatim in the summary. 

62 keywords = meta.get("keywords", []) 

63 if isinstance(keywords, list): 63 ↛ 67line 63 didn't jump to line 67 because the condition on line 63 was always true

64 for kw in keywords: 

65 if q in str(kw).lower(): 

66 score += 4 

67 summary = str(meta.get("summary", "")).lower() 

68 if q in summary: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true

69 score += 1 

70 if q in name.lower(): 

71 score += 1 

72 # When the only signal is a query and it didn't hit, drop it. 

73 if score == 0 and not t: 

74 continue 

75 results.append((name, score)) 

76 results.sort(key=lambda x: (-x[1], x[0])) 

77 return results 

78 

79 

80def _format(name: str) -> dict[str, object]: 

81 """Format a metadata entry for the tool response.""" 

82 meta = _catalog().get(name, {}) 

83 return { 

84 "name": name, 

85 "resource_uri": _resource_uri(name), 

86 "summary": meta.get("summary", ""), 

87 "topics": meta.get("topics", []), 

88 "keywords": meta.get("keywords", []), 

89 "related": meta.get("related", []), 

90 } 

91 

92 

93@mcp.tool(tags={"safe", "docs"}) 

94@audit_logged 

95async def find_docs( 

96 query: str | None = None, 

97 topic: str | None = None, 

98 limit: int = 10, 

99) -> list[dict[str, object]]: 

100 """`find_docs` — search the docs catalog by topic and free-text query. 

101 

102 Searches both the ``docs/*.md`` guides and the package-level READMEs that 

103 live next to the code under ``mcp/``. Each result carries a 

104 ``resource_uri`` naming the exact resource to fetch 

105 (``docs://gco/docs/{name}`` for a guide, ``docs://gco/packages/{name}`` 

106 for a package README). 

107 

108 Args: 

109 query: Free-text query matched against the doc's keywords, summary, 

110 and name (case-insensitive substring match). 

111 topic: Filter by topic substring (case-insensitive). Acts as a hard 

112 filter — entries without a topic match are dropped. 

113 limit: Maximum results (default 10). ``limit <= 0`` returns ``[]``. 

114 

115 Scoring: topic substring matches contribute 3 pts each; keyword 

116 substring matches contribute 4 pts each; summary/name substring 

117 matches contribute 1 pt each. Returns the top ``limit`` matches 

118 sorted by score descending then name ascending. 

119 """ 

120 if limit <= 0: 

121 return [] 

122 no_filters = not query and not topic 

123 if no_filters: 

124 # Stable alpha-sorted listing for the no-arg case. 

125 names = sorted(_catalog().keys())[:limit] 

126 return [_format(name) for name in names] 

127 matches = _search(query, topic) 

128 return [_format(name) for name, _score in matches[:limit]]