Source code for notdiamond.toolkit.litellm.litellm_notdiamond

# flake8: noqa

# This file is a modified version of the original module provided by BerriAI.
# We have modified the file to add support for Not Diamond, and include the following
# license to comply with their license requirements:

# MIT License

# Copyright (c) 2023 Berri AI

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import types
from typing import Callable, Dict, List, Optional

import httpx
import litellm
import requests
from litellm._version import version
from litellm.utils import ModelResponse

# dict to map notdiamond providers and models to litellm providers and models
ND2LITELLM = {
    # openai
    "openai/gpt-3.5-turbo": "gpt-3.5-turbo-0125",
    "openai/gpt-3.5-turbo-0125": "gpt-3.5-turbo-0125",
    "openai/gpt-4": "gpt-4",
    "openai/gpt-4-0613": "gpt-4-0613",
    "openai/gpt-4o": "gpt-4o",
    "openai/gpt-4o-2024-05-13": "gpt-4o-2024-05-13",
    "openai/gpt-4-turbo": "gpt-4-turbo",
    "openai/gpt-4-turbo-2024-04-09": "gpt-4-turbo-2024-04-09",
    "openai/gpt-4-turbo-preview": "gpt-4-turbo-preview",
    "openai/gpt-4-0125-preview": "gpt-4-0125-preview",
    "openai/gpt-4-1106-preview": "gpt-4-1106-preview",
    "openai/gpt-4-1106-preview": "gpt-4-1106-preview",
    "openai/gpt-4o-mini": "gpt-4o-mini",
    "openai/gpt-4o-mini-2024-07-18": "gpt-4o-mini-2024-07-18",
    "openai/o1-preview-2024-09-12": "o1-preview-2024-09-12",
    "openai/o1-preview": "o1-preview",
    "openai/o1-mini-2024-09-12": "o1-mini-2024-09-12",
    "openai/o1-mini": "o1-mini",
    # anthropic
    "anthropic/claude-2.1": "claude-2.1",
    "anthropic/claude-3-opus-20240229": "claude-3-opus-20240229",
    "anthropic/claude-3-sonnet-20240229": "claude-3-sonnet-20240229",
    "anthropic/claude-3-5-sonnet-20240620": "claude-3-5-sonnet-20240620",
    "anthropic/claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
    "anthropic/claude-3-5-sonnet-latest": "claude-3-5-sonnet-20241022",
    "anthropic/claude-3-5-haiku-20241022": "claude-3-5-haiku-20241022",
    "anthropic/claude-3-haiku-20240307": "claude-3-haiku-20240307",
    # mistral
    "mistral/mistral-large-latest": "mistral/mistral-large-latest",
    "mistral/mistral-medium-latest": "mistral/mistral-medium-latest",
    "mistral/mistral-small-latest": "mistral/mistral-small-latest",
    "mistral/codestral-latest": "mistral/codestral-latest",
    "mistral/open-mistral-7b": "mistral/open-mistral-7b",
    "mistral/open-mixtral-8x7b": "mistral/open-mixtral-8x7b",
    "mistral/open-mixtral-8x22b": "mistral/open-mixtral-8x22b",
    "mistral/mistral-large-2407": "mistral/mistral-large-2407",
    "mistral/mistral-large-2402": "mistral/mistral-large-2402",
    # perplexity
    "perplexity/llama-3.1-sonar-large-128k-online": "perplexity/llama-3.1-sonar-large-128k-online",
    # cohere
    "cohere/command-r": "cohere_chat/command-r",
    "cohere/command-r-plus": "cohere_chat/command-r-plus",
    # google
    "google/gemini-pro": "gemini/gemini-pro",
    "google/gemini-1.5-pro-latest": "gemini/gemini-1.5-pro-latest",
    "google/gemini-1.5-flash-latest": "gemini/gemini-1.5-flash-latest",
    "google/gemini-1.0-pro-latest": "gemini/gemini-pro",
    # replicate
    "replicate/mistral-7b-instruct-v0.2": "replicate/mistralai/mistral-7b-instruct-v0.2",
    "replicate/mixtral-8x7b-instruct-v0.1": "replicate/mistralai/mixtral-8x7b-instruct-v0.1",
    "replicate/meta-llama-3-70b-instruct": "replicate/meta/meta-llama-3-70b-instruct",
    "replicate/meta-llama-3-8b-instruct": "replicate/meta/meta-llama-3-8b-instruct",
    "replicate/meta-llama-3.1-405b-instruct": "replicate/meta/meta-llama-3.1-405b-instruct",
    # togetherai
    "togetherai/Mistral-7B-Instruct-v0.2": "together_ai/mistralai/Mistral-7B-Instruct-v0.2",
    "togetherai/Mixtral-8x7B-Instruct-v0.1": "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
    "togetherai/Mixtral-8x22B-Instruct-v0.1": "together_ai/mistralai/Mixtral-8x22B-Instruct-v0.1",
    "togetherai/Llama-3-70b-chat-hf": "together_ai/meta-llama/Llama-3-70b-chat-hf",
    "togetherai/Llama-3-8b-chat-hf": "together_ai/meta-llama/Llama-3-8b-chat-hf",
    "togetherai/Qwen2-72B-Instruct": "together_ai/Qwen/Qwen2-72B-Instruct",
    "togetherai/Meta-Llama-3.1-8B-Instruct-Turbo": "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
    "togetherai/Meta-Llama-3.1-70B-Instruct-Turbo": "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
    "togetherai/Meta-Llama-3.1-405B-Instruct-Turbo": "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
}


[docs] class NotDiamondError(Exception): def __init__( self, status_code, message, url="https://api.notdiamond.ai", ): self.status_code = status_code self.message = message self.request = httpx.Request(method="POST", url=url) self.response = httpx.Response( status_code=status_code, request=self.request ) super().__init__( self.message ) # Call the base class constructor with the parameters it needs
[docs] class NotDiamondConfig: llm_providers: List[Dict[str, str]] tools: Optional[List[Dict[str, str]]] = None max_model_depth: int = 1 # tradeoff params: "cost"/"latency" tradeoff: Optional[str] = None preference_id: Optional[str] = None hash_content: Optional[bool] = False def __init__( self, llm_providers: List[Dict[str, str]], tools: Optional[str] = None, max_model_depth: Optional[int] = 1, tradeoff: Optional[str] = None, preference_id: Optional[str] = None, hash_content: Optional[bool] = False, ) -> None: locals_ = locals() for key, value in locals_.items(): if key != "self" and value is not None: setattr(self.__class__, key, value)
[docs] @classmethod def get_config(cls): return { k: v for k, v in cls.__dict__.items() if not k.startswith("__") and not isinstance( v, ( types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod, ), ) and v is not None or k == "llm_providers" }
[docs] def validate_environment(api_key): if api_key is None: raise ValueError( "Missing NOTDIAMOND_API_KEY in env - A call is being made to Not Diamond but no key is set either in the environment variables or via params" ) headers = { "Authorization": "Bearer " + api_key, "accept": "application/json", "content-type": "application/json", "User-Agent": f"litellm/{version}", } return headers
[docs] def get_litellm_model(response: dict) -> str: nd_provider = response["providers"][0]["provider"] nd_model = response["providers"][0]["model"] nd_provider_model = f"{nd_provider}/{nd_model}" litellm_model = ND2LITELLM[nd_provider_model] return litellm_model
[docs] def update_litellm_params(litellm_params: dict): """ Create a new litellm_params dict with non-default litellm_params from the original call, custom_llm_provider and api_base """ new_litellm_params = dict() for k, v in litellm_params.items(): # all litellm_params have defaults of None or False, except force_timeout if (k == "force_timeout" and v != 600) or v: new_litellm_params[k] = v if "custom_llm_provider" in new_litellm_params: del new_litellm_params["custom_llm_provider"] if "api_base" in new_litellm_params: del new_litellm_params["api_base"] if "api_key" in new_litellm_params: del new_litellm_params["api_key"] return new_litellm_params
[docs] def completion( model: str, messages: list, api_base: str, model_response: ModelResponse, print_verbose: Callable, encoding, api_key, logging_obj, optional_params=None, litellm_params=None, logger_fn=None, ): headers = validate_environment(api_key) completion_url = api_base ## Load Config config = NotDiamondConfig.get_config() for k, v in config.items(): if k not in optional_params: optional_params[k] = v # separate ND optional params from litellm optional params nd_params = [ "llm_providers", "tools", "max_model_depth", "tradeoff", "preference_id", "hash_content", ] selected_model_params = dict() for k, v in optional_params.items(): if k not in nd_params: selected_model_params[k] = v if "tools" in optional_params: selected_model_params["tools"] = optional_params["tools"] # remove any optional params that are not in the ND params optional_params = { k: v for k, v in optional_params.items() if k in nd_params } data = { "messages": messages, **optional_params, } ## LOGGING logging_obj.pre_call( input=messages, api_key=api_key, additional_args={ "complete_input_dict": data, "headers": headers, "api_base": completion_url, }, ) ## MODEL SELECTION CALL nd_response = requests.post( api_base, headers=headers, json=data, ) print_verbose(f"Raw response from Not Diamond: {nd_response.text}") ## RESPONSE OBJECT if nd_response.status_code != 200: raise NotDiamondError( status_code=nd_response.status_code, message=nd_response.text ) nd_response = nd_response.json() litellm_model = get_litellm_model(nd_response) ## COMPLETION CALL litellm_params = update_litellm_params(litellm_params) is_async_call = litellm_params.pop("acompletion", False) if is_async_call: return litellm.acompletion( model=litellm_model, messages=messages, **selected_model_params, **litellm_params, ) else: return litellm.completion( model=litellm_model, messages=messages, **selected_model_params, **litellm_params, )