Coverage for mcp/metric_readers/logs.py: 97%
52 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Pull a scalar out of a job's log lines.
3A job often prints its progress to stdout — sometimes as a structured JSON
4object per line, sometimes as free text a regular expression can pick apart.
5This module turns either of those into a list of candidate values that a
6later reduction step collapses into one number.
8It offers three pure helpers:
10* :func:`extract_by_json_key` — parse each line as a JSON object and pull the
11 value at a dotted key path.
12* :func:`extract_by_regex` — match each line against a compiled pattern and
13 pull its first capture group.
14* :func:`coerce_scalar` — turn one extracted value (a parsed JSON value or a
15 captured string) into a real, finite number, or fail with a structured
16 error.
18Everything here is pure: no log fetching, no I/O, no environment lookups. The
19caller supplies the already-retrieved lines and (for the regex path) an
20already-compiled pattern, which keeps these functions trivial to test in
21isolation.
22"""
24from __future__ import annotations
26import json
27import math
28import re
29from typing import Any, cast
31from . import shape
34def extract_by_json_key(lines: list[str], key: str) -> list[object]:
35 """Collect the value at ``key`` from every line that is a JSON object.
37 Each line is parsed independently. A line that is not valid JSON, or
38 that parses to something other than a JSON object (a bare number, string,
39 list, ``true``/``false``/``null``), is skipped — it contributes nothing
40 and never raises.
42 ``key`` is treated as a dotted path and resolved segment by segment
43 against the parsed object: ``"metrics.loss"`` walks ``obj["metrics"]
44 ["loss"]``. A line whose object does not contain the full path resolves
45 to nothing and is skipped too. The resolved values are returned in the
46 order the lines appeared; coercion to a number happens later.
47 """
48 segments = key.split(".")
49 collected: list[object] = []
50 for line in lines:
51 try:
52 parsed = json.loads(line)
53 except ValueError:
54 # Not parseable as JSON at all (covers JSONDecodeError, which is
55 # a ValueError subclass, and empty/whitespace-only lines).
56 continue
57 if not isinstance(parsed, dict):
58 # Parsed fine, but it is not a JSON object — skip it.
59 continue
60 value: Any = parsed
61 resolved = True
62 for segment in segments:
63 if isinstance(value, dict) and segment in value:
64 value = value[segment]
65 else:
66 resolved = False
67 break
68 if resolved:
69 collected.append(value)
70 return collected
73def extract_by_regex(lines: list[str], pattern: re.Pattern[str]) -> list[str]:
74 """Collect the first capture group from every line the pattern matches.
76 Each line is searched (not anchored) with ``pattern``; on a match the
77 substring captured by the pattern's first group is collected. Lines that
78 do not match contribute nothing. The pattern is expected to define at
79 least one capture group — the caller validates that before compiling —
80 and the captures are returned in line order for later coercion.
81 """
82 collected: list[str] = []
83 for line in lines:
84 match = pattern.search(line)
85 if match is not None:
86 collected.append(match.group(1))
87 return collected
90def _parse_numeric_string(raw: str) -> float:
91 """Parse a string into a real, finite number or raise.
93 Integers are recognized first so a whole number keeps its integer form;
94 anything else falls back to a float parse. A value that does not parse,
95 or that parses to NaN or an infinity, raises
96 :class:`~.shape.MetricReaderError` with code
97 :attr:`~.shape.ErrorCode.NON_NUMERIC_VALUE` and the offending raw string
98 in the details.
99 """
100 text = raw.strip()
101 try:
102 return int(text)
103 except ValueError:
104 pass
105 try:
106 parsed = float(text)
107 except ValueError, OverflowError:
108 raise shape.MetricReaderError(
109 shape.ErrorCode.NON_NUMERIC_VALUE,
110 {"raw": raw, "type": "str"},
111 ) from None
112 if not math.isfinite(parsed): 112 ↛ 113line 112 didn't jump to line 113 because the condition on line 112 was never true
113 raise shape.MetricReaderError(
114 shape.ErrorCode.NON_NUMERIC_VALUE,
115 {"raw": raw, "type": "str"},
116 )
117 return parsed
120def coerce_scalar(raw: object) -> float:
121 """Turn one extracted value into a real, finite number, or fail.
123 A value that is already a real, finite number (and not a boolean) is
124 returned unchanged. A string is parsed — an integer form when possible,
125 otherwise a float — and rejected if it does not represent a finite
126 number. Every other input (a boolean, ``None``, NaN, an infinity, a list,
127 an object) raises :class:`~.shape.MetricReaderError` with code
128 :attr:`~.shape.ErrorCode.NON_NUMERIC_VALUE` and the offending raw value
129 recorded in the details.
130 """
131 if shape.is_numeric_value(raw):
132 return cast(float, raw)
133 if isinstance(raw, str):
134 return _parse_numeric_string(raw)
135 raise shape.MetricReaderError(
136 shape.ErrorCode.NON_NUMERIC_VALUE,
137 {"raw": raw, "type": type(raw).__name__},
138 )