Coverage for mcp/metric_readers/logs.py: 97%

52 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-15 15:07 +0000

1"""Pull a scalar out of a job's log lines. 

2 

3A job often prints its progress to stdout — sometimes as a structured JSON 

4object per line, sometimes as free text a regular expression can pick apart. 

5This module turns either of those into a list of candidate values that a 

6later reduction step collapses into one number. 

7 

8It offers three pure helpers: 

9 

10* :func:`extract_by_json_key` — parse each line as a JSON object and pull the 

11 value at a dotted key path. 

12* :func:`extract_by_regex` — match each line against a compiled pattern and 

13 pull its first capture group. 

14* :func:`coerce_scalar` — turn one extracted value (a parsed JSON value or a 

15 captured string) into a real, finite number, or fail with a structured 

16 error. 

17 

18Everything here is pure: no log fetching, no I/O, no environment lookups. The 

19caller supplies the already-retrieved lines and (for the regex path) an 

20already-compiled pattern, which keeps these functions trivial to test in 

21isolation. 

22""" 

23 

24from __future__ import annotations 

25 

26import json 

27import math 

28import re 

29from typing import Any, cast 

30 

31from . import shape 

32 

33 

34def extract_by_json_key(lines: list[str], key: str) -> list[object]: 

35 """Collect the value at ``key`` from every line that is a JSON object. 

36 

37 Each line is parsed independently. A line that is not valid JSON, or 

38 that parses to something other than a JSON object (a bare number, string, 

39 list, ``true``/``false``/``null``), is skipped — it contributes nothing 

40 and never raises. 

41 

42 ``key`` is treated as a dotted path and resolved segment by segment 

43 against the parsed object: ``"metrics.loss"`` walks ``obj["metrics"] 

44 ["loss"]``. A line whose object does not contain the full path resolves 

45 to nothing and is skipped too. The resolved values are returned in the 

46 order the lines appeared; coercion to a number happens later. 

47 """ 

48 segments = key.split(".") 

49 collected: list[object] = [] 

50 for line in lines: 

51 try: 

52 parsed = json.loads(line) 

53 except ValueError: 

54 # Not parseable as JSON at all (covers JSONDecodeError, which is 

55 # a ValueError subclass, and empty/whitespace-only lines). 

56 continue 

57 if not isinstance(parsed, dict): 

58 # Parsed fine, but it is not a JSON object — skip it. 

59 continue 

60 value: Any = parsed 

61 resolved = True 

62 for segment in segments: 

63 if isinstance(value, dict) and segment in value: 

64 value = value[segment] 

65 else: 

66 resolved = False 

67 break 

68 if resolved: 

69 collected.append(value) 

70 return collected 

71 

72 

73def extract_by_regex(lines: list[str], pattern: re.Pattern[str]) -> list[str]: 

74 """Collect the first capture group from every line the pattern matches. 

75 

76 Each line is searched (not anchored) with ``pattern``; on a match the 

77 substring captured by the pattern's first group is collected. Lines that 

78 do not match contribute nothing. The pattern is expected to define at 

79 least one capture group — the caller validates that before compiling — 

80 and the captures are returned in line order for later coercion. 

81 """ 

82 collected: list[str] = [] 

83 for line in lines: 

84 match = pattern.search(line) 

85 if match is not None: 

86 collected.append(match.group(1)) 

87 return collected 

88 

89 

90def _parse_numeric_string(raw: str) -> float: 

91 """Parse a string into a real, finite number or raise. 

92 

93 Integers are recognized first so a whole number keeps its integer form; 

94 anything else falls back to a float parse. A value that does not parse, 

95 or that parses to NaN or an infinity, raises 

96 :class:`~.shape.MetricReaderError` with code 

97 :attr:`~.shape.ErrorCode.NON_NUMERIC_VALUE` and the offending raw string 

98 in the details. 

99 """ 

100 text = raw.strip() 

101 try: 

102 return int(text) 

103 except ValueError: 

104 pass 

105 try: 

106 parsed = float(text) 

107 except ValueError, OverflowError: 

108 raise shape.MetricReaderError( 

109 shape.ErrorCode.NON_NUMERIC_VALUE, 

110 {"raw": raw, "type": "str"}, 

111 ) from None 

112 if not math.isfinite(parsed): 112 ↛ 113line 112 didn't jump to line 113 because the condition on line 112 was never true

113 raise shape.MetricReaderError( 

114 shape.ErrorCode.NON_NUMERIC_VALUE, 

115 {"raw": raw, "type": "str"}, 

116 ) 

117 return parsed 

118 

119 

120def coerce_scalar(raw: object) -> float: 

121 """Turn one extracted value into a real, finite number, or fail. 

122 

123 A value that is already a real, finite number (and not a boolean) is 

124 returned unchanged. A string is parsed — an integer form when possible, 

125 otherwise a float — and rejected if it does not represent a finite 

126 number. Every other input (a boolean, ``None``, NaN, an infinity, a list, 

127 an object) raises :class:`~.shape.MetricReaderError` with code 

128 :attr:`~.shape.ErrorCode.NON_NUMERIC_VALUE` and the offending raw value 

129 recorded in the details. 

130 """ 

131 if shape.is_numeric_value(raw): 

132 return cast(float, raw) 

133 if isinstance(raw, str): 

134 return _parse_numeric_string(raw) 

135 raise shape.MetricReaderError( 

136 shape.ErrorCode.NON_NUMERIC_VALUE, 

137 {"raw": raw, "type": type(raw).__name__}, 

138 )