Skip to content

Litellm

litellm

LiteLLMBase

LiteLLMBase(litellm_model, model_id=None)

Bases: Endpoint

Source code in llmeter/endpoints/litellm.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def __init__(
    self,
    litellm_model: str,
    model_id: str | None = None,
):
    self.litellm_model = litellm_model
    model_id_inferred, provider, _, _ = get_llm_provider(litellm_model)

    logger.info(f"Using model {model_id_inferred} from provider {provider}")
    super().__init__(
        model_id=model_id or model_id_inferred,
        provider=provider,
        endpoint_name=model_id_inferred,
    )

create_payload staticmethod

create_payload(user_message, max_tokens=256, system_message=None, **kwargs)

Create a payload for the LiteLLM completion() request.

Parameters:

Name Type Description Default
user_message str | Sequence[str]

The user's message or a sequence of messages.

required
max_tokens int

The maximum number of tokens to generate. Defaults to 256.

256
**kwargs Any

Additional keyword arguments to include in the payload.

{}

Returns:

Name Type Description
dict dict

The formatted payload for the Bedrock API request.

Source code in llmeter/endpoints/litellm.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
@staticmethod
def create_payload(
    user_message: str | Sequence[str],
    max_tokens: int = 256,
    system_message: str | None = None,
    **kwargs: Any,
) -> dict:
    """
    Create a payload for the LiteLLM `completion()` request.

    Args:
        user_message (str | Sequence[str]): The user's message or a sequence of messages.
        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 256.
        **kwargs: Additional keyword arguments to include in the payload.

    Returns:
        dict: The formatted payload for the Bedrock API request.
    """

    if isinstance(user_message, str):
        user_message = [user_message]
    payload = {
        "messages": [{"role": "user", "content": k} for k in user_message],
        "max_tokens": max_tokens,
    }
    payload.update(kwargs)
    if system_message:
        payload["messages"].append({"role": "system", "content": system_message})
    return payload