Skip to content

Openai response

openai_response

OpenAIEndpointBase

OpenAIEndpointBase(endpoint_name, model_id, api_key=None, provider='openai', **kwargs)

Bases: Endpoint[TOpenAIResponseBase], Generic[TOpenAIResponseBase]

Base class for OpenAI Responses API endpoints (streaming and non-streaming)

Parameters:

Name Type Description Default
endpoint_name str

Name of the endpoint

required
model_id str

ID of the OpenAI model to use

required
api_key str | None

OpenAI API key (optional, uses OPENAI_API_KEY env var if not provided)

None
provider str

Provider name (default: "openai")

'openai'
**kwargs Any

Additional arguments passed to OpenAI client

{}
Source code in llmeter/endpoints/openai_response.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    endpoint_name: str,
    model_id: str,
    api_key: str | None = None,
    provider: str = "openai",
    **kwargs: Any,
):
    """Initialize Response API endpoint.

    Args:
        endpoint_name: Name of the endpoint
        model_id: ID of the OpenAI model to use
        api_key: OpenAI API key (optional, uses OPENAI_API_KEY env var if not provided)
        provider: Provider name (default: "openai")
        **kwargs: Additional arguments passed to OpenAI client
    """
    super().__init__(endpoint_name, model_id, provider=provider)
    self._client = OpenAI(api_key=api_key, **kwargs)

create_payload staticmethod

create_payload(user_message, max_output_tokens=256, instructions=None, **kwargs)

Create a payload for the Responses API request.

This is a convenience helper. You can also build the payload directly using openai.types.responses.ResponseCreateParams.

Parameters:

Name Type Description Default
user_message str | Sequence[str]

User message(s) to send (can be string or array of messages)

required
max_output_tokens int

Maximum tokens in response (default: 256)

256
instructions str | None

Optional system-level instructions

None
**kwargs

Additional payload parameters (temperature, top_p, text.format, etc.)

{}

Returns:

Type Description
ResponseCreateParams

ResponseCreateParams formatted for Responses API

Source code in llmeter/endpoints/openai_response.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
@staticmethod
def create_payload(
    user_message: str | Sequence[str],
    max_output_tokens: int = 256,
    instructions: str | None = None,
    **kwargs,
) -> ResponseCreateParams:
    """Create a payload for the Responses API request.

    This is a convenience helper. You can also build the payload directly
    using ``openai.types.responses.ResponseCreateParams``.

    Args:
        user_message: User message(s) to send (can be string or array of messages)
        max_output_tokens: Maximum tokens in response (default: 256)
        instructions: Optional system-level instructions
        **kwargs: Additional payload parameters (temperature, top_p, text.format, etc.)

    Returns:
        ResponseCreateParams formatted for Responses API
    """
    if isinstance(user_message, str):
        input_value: str | list[dict] = user_message
    else:
        input_value = [{"role": "user", "content": msg} for msg in user_message]

    payload: dict = {
        "input": input_value,
        "max_output_tokens": max_output_tokens,
    }

    if instructions:
        payload["instructions"] = instructions

    payload.update(kwargs)
    return cast(ResponseCreateParams, payload)

OpenAIResponseEndpoint

OpenAIResponseEndpoint(model_id, endpoint_name='openai-response', api_key=None, provider='openai', **kwargs)

Bases: OpenAIEndpointBase[Response]

Endpoint for OpenAI Responses API (non-streaming).

This endpoint provides access to OpenAI's newer Responses API which offers structured outputs, better response format control, and improved multi-turn conversation handling.

Parameters:

Name Type Description Default
model_id str

ID of the OpenAI model to use

required
endpoint_name str

Name of the endpoint (default: "openai-response")

'openai-response'
api_key str | None

OpenAI API key (optional, uses OPENAI_API_KEY env var if not provided)

None
provider str

Provider name (default: "openai")

'openai'
**kwargs Any

Additional arguments passed to OpenAI client

{}
Source code in llmeter/endpoints/openai_response.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def __init__(
    self,
    model_id: str,
    endpoint_name: str = "openai-response",
    api_key: str | None = None,
    provider: str = "openai",
    **kwargs: Any,
):
    """Initialize Response API endpoint.

    Args:
        model_id: ID of the OpenAI model to use
        endpoint_name: Name of the endpoint (default: "openai-response")
        api_key: OpenAI API key (optional, uses OPENAI_API_KEY env var if not provided)
        provider: Provider name (default: "openai")
        **kwargs: Additional arguments passed to OpenAI client
    """
    super().__init__(
        endpoint_name, model_id, api_key=api_key, provider=provider, **kwargs
    )

invoke

invoke(payload)

Invoke the Responses API.

Source code in llmeter/endpoints/openai_response.py
147
148
149
150
151
@OpenAIEndpointBase.llmeter_invoke
def invoke(self, payload: ResponseCreateParamsNonStreaming) -> Response:
    """Invoke the Responses API."""
    client_response = self._client.responses.create(**payload)
    return client_response

prepare_payload

prepare_payload(payload)

Ensure payload specifies correct model ID and streaming disabled

Source code in llmeter/endpoints/openai_response.py
153
154
155
156
157
158
159
def prepare_payload(self, payload):
    """Ensure payload specifies correct model ID and streaming disabled"""
    return {
        **payload,
        "model": self.model_id,
        "stream": False,
    }

OpenAIResponseStreamEndpoint

OpenAIResponseStreamEndpoint(model_id, endpoint_name='openai-response-stream', api_key=None, provider='openai', ttft_visible_tokens_only=True, **kwargs)

Bases: OpenAIEndpointBase[Iterable[ResponseStreamEvent]]

Endpoint for OpenAI Responses API (streaming).

This endpoint provides streaming access to OpenAI's Responses API, enabling time-to-first-token measurements and incremental response processing.

Parameters:

Name Type Description Default
ttft_visible_tokens_only bool

Controls how time_to_first_token is measured for reasoning models. When True (default), TTFT records the time to the first visible text token (response.output_text.delta), ignoring reasoning events. When False, TTFT records the time to the first token of any kind — including reasoning summary or reasoning text deltas — giving a measure of when the model first started producing output. Has no effect for non-reasoning models.

True

Parameters:

Name Type Description Default
model_id str

ID of the OpenAI model to use

required
endpoint_name str

Name of the endpoint (default: "openai-response-stream")

'openai-response-stream'
api_key str | None

OpenAI API key (optional, uses OPENAI_API_KEY env var if not provided)

None
provider str

Provider name (default: "openai")

'openai'
ttft_visible_tokens_only bool

When True (default), TTFT measures time to first visible text token. When False, TTFT includes reasoning token events.

True
**kwargs

Additional arguments passed to OpenAI client

{}
Source code in llmeter/endpoints/openai_response.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def __init__(
    self,
    model_id: str,
    endpoint_name: str = "openai-response-stream",
    api_key: str | None = None,
    provider: str = "openai",
    ttft_visible_tokens_only: bool = True,
    **kwargs,
):
    """Initialize streaming Response API endpoint.

    Args:
        model_id: ID of the OpenAI model to use
        endpoint_name: Name of the endpoint (default: "openai-response-stream")
        api_key: OpenAI API key (optional, uses OPENAI_API_KEY env var if not provided)
        provider: Provider name (default: "openai")
        ttft_visible_tokens_only: When True (default), TTFT measures time to first visible text
            token. When False, TTFT includes reasoning token events.
        **kwargs: Additional arguments passed to OpenAI client
    """
    super().__init__(
        model_id=model_id,
        endpoint_name=endpoint_name,
        api_key=api_key,
        provider=provider,
        **kwargs,
    )
    self.ttft_visible_tokens_only = ttft_visible_tokens_only

invoke

invoke(payload)

Invoke the Responses API with streaming.

Source code in llmeter/endpoints/openai_response.py
241
242
243
244
245
@OpenAIEndpointBase.llmeter_invoke
def invoke(self, payload: ResponseCreateParamsStreaming):
    """Invoke the Responses API with streaming."""
    client_response = self._client.responses.create(**payload)
    return client_response

prepare_payload

prepare_payload(payload)

Ensure payload specifies correct model ID and streaming options

Source code in llmeter/endpoints/openai_response.py
247
248
249
250
251
252
253
def prepare_payload(self, payload):
    """Ensure payload specifies correct model ID and streaming options"""
    payload = {**payload, "model": self.model_id}
    if not payload.get("stream"):
        payload["stream"] = True
        payload["stream_options"] = {"include_usage": True}
    return payload

process_raw_response

process_raw_response(raw_response, start_t, response)

Parse streaming Response API output into InvocationResponse.

Processes typed events from the stream:

  • ResponseCreatedEvent: captures response.id
  • ResponseTextDeltaEvent: accumulates text deltas, records TTFT
  • ResponseCompletedEvent: extracts usage from response.usage
  • ResponseFailedEvent: captures API-level errors
  • Reasoning events (response.reasoning_summary_text.delta, response.reasoning_text.delta): when ttft_visible_tokens_only is False, these set TTFT on the first reasoning token.
Source code in llmeter/endpoints/openai_response.py
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def process_raw_response(
    self,
    raw_response: Iterable[ResponseStreamEvent],
    start_t: float,
    response: InvocationResponse,
) -> None:
    """Parse streaming Response API output into InvocationResponse.

    Processes typed events from the stream:

    - `ResponseCreatedEvent`: captures `response.id`
    - `ResponseTextDeltaEvent`: accumulates text deltas, records TTFT
    - `ResponseCompletedEvent`: extracts usage from `response.usage`
    - `ResponseFailedEvent`: captures API-level errors
    - Reasoning events (`response.reasoning_summary_text.delta`,
      `response.reasoning_text.delta`): when `ttft_visible_tokens_only` is ``False``, these set
      TTFT on the first reasoning token.
    """
    _REASONING_DELTA_TYPES = frozenset((
        "response.reasoning_summary_text.delta",
        "response.reasoning_text.delta",
    ))

    for event in raw_response:
        now = time.perf_counter()
        if event.type == "response.created":
            response.id = event.response.id

        elif event.type == "response.output_text.delta":
            if response.response_text is None:
                response.response_text = event.delta
                if response.time_to_first_token is None:
                    response.time_to_first_token = now - start_t
            else:
                response.response_text += event.delta
            response.time_to_last_token = now - start_t

        elif (
            not self.ttft_visible_tokens_only
            and event.type in _REASONING_DELTA_TYPES
        ):
            if response.time_to_first_token is None:
                response.time_to_first_token = now - start_t

        elif event.type == "response.completed":
            usage = event.response.usage
            if usage is not None:
                response.num_tokens_input = usage.input_tokens
                response.num_tokens_output = usage.output_tokens
                details = getattr(usage, "input_tokens_details", None)
                if details:
                    response.num_tokens_input_cached = getattr(
                        details, "cached_tokens", None
                    )
                output_details = getattr(usage, "output_tokens_details", None)
                if output_details:
                    response.num_tokens_output_reasoning = getattr(
                        output_details, "reasoning_tokens", None
                    )

        elif event.type == "response.failed":
            error_obj = getattr(event.response, "error", None)
            if error_obj is not None:
                error_msg = (
                    getattr(error_obj, "message", None) or str(error_obj)
                )
                error_code = getattr(error_obj, "code", None)
                if error_code:
                    error_msg = f"{error_code}: {error_msg}"
            else:
                error_msg = "Response API request failed"
            response.error = error_msg
            response.time_to_last_token = now - start_t