Skip to content

Bedrock

bedrock

LLMeter targets for testing the Amazon Bedrock Converse and ConverseStream APIs

Alternatively, see:

BedrockBase

BedrockBase(model_id, endpoint_name=None, region=None, inference_config=None, bedrock_boto3_client=None, max_attempts=3)

Bases: Endpoint

Base class for interacting with Amazon Bedrock endpoints.

This class provides core functionality for making requests to Amazon Bedrock endpoints, handling configuration and client initialization.

Parameters:

Name Type Description Default
model_id str

The identifier for the model to use

required
endpoint_name str | None

Name of the endpoint. Defaults to None.

None
region str | None

AWS region to use. Defaults to None.

None
inference_config dict | None

Configuration for inference. Defaults to None.

None
bedrock_boto3_client client

Pre-configured boto3 client. Defaults to None.

None
max_attempts int

Maximum number of retry attempts. Defaults to 3.

3
Source code in llmeter/endpoints/bedrock.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    model_id: str,
    endpoint_name: str | None = None,
    region: str | None = None,
    inference_config: dict | None = None,
    bedrock_boto3_client=None,
    max_attempts: int = 3,
):
    super().__init__(
        model_id=model_id,
        endpoint_name=endpoint_name or "amazon bedrock",
        provider="bedrock",
    )

    self.region = region or boto3.session.Session().region_name
    logger.info(f"Using AWS region: {self.region}")

    self._bedrock_client = bedrock_boto3_client
    if self._bedrock_client is None:
        config = Config(retries={"max_attempts": max_attempts, "mode": "standard"})
        self._bedrock_client = boto3.client(
            "bedrock-runtime", region_name=self.region, config=config
        )
    self._inference_config = inference_config

create_payload staticmethod

create_payload(user_message, max_tokens=256, **kwargs)

Create a payload for the Bedrock Converse API request.

Parameters:

Name Type Description Default
user_message str | Sequence[str]

The user's message or a sequence of messages.

required
max_tokens int

The maximum number of tokens to generate. Defaults to 256.

256
**kwargs Any

Additional keyword arguments to include in the payload.

{}

Returns:

Name Type Description
dict dict

The formatted payload for the Bedrock API request.

Raises:

Type Description
TypeError

If user_message is not a string or list of strings

ValueError

If max_tokens is not a positive integer

Source code in llmeter/endpoints/bedrock.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
@staticmethod
def create_payload(
    user_message: str | list[str], max_tokens: int = 256, **kwargs: Any
) -> dict:
    """
    Create a payload for the Bedrock Converse API request.

    Args:
        user_message (str | Sequence[str]): The user's message or a sequence of messages.
        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 256.
        **kwargs: Additional keyword arguments to include in the payload.

    Returns:
        dict: The formatted payload for the Bedrock API request.

    Raises:
        TypeError: If user_message is not a string or list of strings
        ValueError: If max_tokens is not a positive integer
    """
    if not isinstance(user_message, (str, list)):
        raise TypeError("user_message must be a string or list of strings")

    if isinstance(user_message, list):
        if not all(isinstance(msg, str) for msg in user_message):
            raise TypeError("All messages must be strings")
        if not user_message:
            raise ValueError("user_message list cannot be empty")

    if not isinstance(max_tokens, int) or max_tokens <= 0:
        raise ValueError("max_tokens must be a positive integer")

    if isinstance(user_message, str):
        user_message = [user_message]

    try:
        payload: dict = {
            "messages": [
                {"role": "user", "content": [{"text": k}]} for k in user_message
            ],
        }
        payload.update(kwargs)
        if payload.get("inferenceConfig") is None:
            payload["inferenceConfig"] = {}

        payload["inferenceConfig"] = {
            **payload["inferenceConfig"],
            "maxTokens": max_tokens,
        }
        return payload

    except Exception as e:
        logger.error(f"Error creating payload: {e}")
        raise RuntimeError(f"Failed to create payload: {str(e)}")

BedrockConverse

BedrockConverse(model_id, endpoint_name=None, region=None, inference_config=None, bedrock_boto3_client=None, max_attempts=3)

Bases: BedrockBase

Source code in llmeter/endpoints/bedrock.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    model_id: str,
    endpoint_name: str | None = None,
    region: str | None = None,
    inference_config: dict | None = None,
    bedrock_boto3_client=None,
    max_attempts: int = 3,
):
    super().__init__(
        model_id=model_id,
        endpoint_name=endpoint_name or "amazon bedrock",
        provider="bedrock",
    )

    self.region = region or boto3.session.Session().region_name
    logger.info(f"Using AWS region: {self.region}")

    self._bedrock_client = bedrock_boto3_client
    if self._bedrock_client is None:
        config = Config(retries={"max_attempts": max_attempts, "mode": "standard"})
        self._bedrock_client = boto3.client(
            "bedrock-runtime", region_name=self.region, config=config
        )
    self._inference_config = inference_config

invoke

invoke(payload, **kwargs)

Invoke the Bedrock converse API with the given payload.

Parameters:

Name Type Description Default
payload dict

The payload containing the request parameters

required
**kwargs Any

Additional keyword arguments to include in the payload

{}

Returns:

Name Type Description
InvocationResponse InvocationResponse

Response object containing generated text and metadata

Raises:

Type Description
ClientError

If there is an error calling the Bedrock API

ValueError

If payload is invalid

TypeError

If payload is not a dictionary

Source code in llmeter/endpoints/bedrock.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
def invoke(self, payload: dict, **kwargs: Any) -> InvocationResponse:
    """
    Invoke the Bedrock converse API with the given payload.

    Args:
        payload (dict): The payload containing the request parameters
        **kwargs: Additional keyword arguments to include in the payload

    Returns:
        InvocationResponse: Response object containing generated text and metadata

    Raises:
        ClientError: If there is an error calling the Bedrock API
        ValueError: If payload is invalid
        TypeError: If payload is not a dictionary
    """
    if not isinstance(payload, dict):
        raise TypeError("Payload must be a dictionary")

    try:
        payload = {**kwargs, **payload}
        if payload.get("inferenceConfig") is None:
            payload["inferenceConfig"] = self._inference_config or {}

        payload["modelId"] = self.model_id
        try:
            start_t = time.perf_counter()
            client_response = self._bedrock_client.converse(**payload)  # type: ignore
            time_to_last_token = time.perf_counter() - start_t
        except ClientError as e:
            logger.error(f"Bedrock API error: {e}")
            return InvocationResponse.error_output(
                input_payload=payload, id=uuid4().hex, error=str(e)
            )
        except Exception as e:
            logger.error(f"Unexpected error during API call: {e}")
            return InvocationResponse.error_output(
                input_payload=payload, id=uuid4().hex, error=str(e)
            )

        response = self._parse_converse_response(client_response)  # type: ignore
        response.input_payload = payload
        response.input_prompt = self._parse_payload(payload)
        response.time_to_last_token = time_to_last_token
        return response

    except Exception as e:
        logger.error(f"Error in invoke method: {e}")
        return InvocationResponse.error_output(
            input_payload=payload, id=uuid4().hex, error=str(e)
        )

BedrockConverseStream

BedrockConverseStream(model_id, endpoint_name=None, region=None, inference_config=None, bedrock_boto3_client=None, max_attempts=3)

Bases: BedrockConverse

Source code in llmeter/endpoints/bedrock.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    model_id: str,
    endpoint_name: str | None = None,
    region: str | None = None,
    inference_config: dict | None = None,
    bedrock_boto3_client=None,
    max_attempts: int = 3,
):
    super().__init__(
        model_id=model_id,
        endpoint_name=endpoint_name or "amazon bedrock",
        provider="bedrock",
    )

    self.region = region or boto3.session.Session().region_name
    logger.info(f"Using AWS region: {self.region}")

    self._bedrock_client = bedrock_boto3_client
    if self._bedrock_client is None:
        config = Config(retries={"max_attempts": max_attempts, "mode": "standard"})
        self._bedrock_client = boto3.client(
            "bedrock-runtime", region_name=self.region, config=config
        )
    self._inference_config = inference_config