monoai.models
Models are the core of MonoAI. They are responsible for executing prompts and returning responses.
This package uses lazy loading to avoid importing heavy optional dependencies at module import time. Classes are imported only when accessed.
1""" 2Models are the core of MonoAI. They are responsible for executing prompts and returning responses. 3 4This package uses lazy loading to avoid importing heavy optional dependencies 5at module import time. Classes are imported only when accessed. 6""" 7 8from .model import Model 9from .hosted_model import HostedModel 10from .multi_model import MultiModel 11from .collaborative_model import CollaborativeModel 12from .image_model import ImageModel 13from .voice_model import VoiceModel 14 15__all__ = ['Model', "HostedModel", 'MultiModel', 'CollaborativeModel', 'ImageModel', 'VoiceModel']
13class Model(BaseModel, ResponseProcessorMixin, PromptExecutorMixin): 14 """ 15 Model class for interacting with AI language models. 16 17 This module provides the Model class which serves as the primary interface for interacting 18 with various AI language models (like GPT-4, Claude-3, etc.). 19 20 Examples 21 -------- 22 Basic usage: 23 ``` 24 model = Model(provider="openai", model="gpt-4") 25 response = model.ask("What is the capital of France?") 26 ``` 27 28 With prompt: 29 ``` 30 model = Model( 31 provider="anthropic", 32 model="claude-3", 33 ) 34 prompt = Prompt( 35 prompt="What is the capital of {country}?", 36 prompt_data={"country": "France"}, 37 response_type=str 38 ) 39 response = model.ask(prompt) 40 ``` 41 """ 42 43 def __init__( 44 self, 45 provider: str | None = None, 46 model: str | None = None, 47 count_tokens: bool = False, 48 count_cost: bool = False, 49 max_tokens: int = None 50 ): 51 """ 52 Initialize a new Model instance. 53 54 Parameters 55 ---------- 56 provider : str 57 Name of the provider (e.g., 'openai', 'anthropic') 58 model : str 59 Name of the model (e.g., 'gpt-4', 'claude-3') 60 count_tokens : bool, optional 61 Whether to count tokens for each request 62 count_cost : bool, optional 63 Whether to calculate costs for each request 64 max_tokens : int, optional 65 Maximum number of tokens for each request 66 """ 67 super().__init__(count_tokens, count_cost, max_tokens) 68 69 if provider is None: 70 provider = Conf()["base_model"]["provider"] 71 if model is None: 72 model = Conf()["base_model"]["model"] 73 74 load_key(provider) 75 76 self.provider = provider 77 self.model = model 78 self._web_search = False 79 80 async def _ask_async(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> Dict: 81 """ 82 Ask the model asynchronously. 83 84 Parameters 85 ---------- 86 prompt : Union[str, Prompt] 87 The prompt to process 88 metadata : Dict, optional 89 Metadata to pass to the completion call 90 91 Returns 92 ------- 93 Dict 94 Dictionary containing: 95 - response: The model's response 96 - prompt: The original prompt 97 - model: Dictionary with provider and model name 98 - tokens: Token counts (if enabled) 99 - cost: Cost calculation (if enabled) 100 101 """ 102 response = await self._execute_async(prompt, metadata) 103 return self._process_response( 104 prompt, 105 response, 106 ) 107 108 109 async def ask_stream(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> AsyncGenerator[Dict, None]: 110 """ 111 Ask the model with streaming response. 112 113 Parameters 114 ---------- 115 prompt : Union[str, Prompt, PromptChain] 116 The prompt to process 117 metadata : Dict, optional 118 Metadata to pass to the completion call 119 120 Yields 121 ------ 122 Dict 123 Streaming response chunks 124 """ 125 yield {"provider":self.provider, "model":self.model} 126 async for chunk in self._execute_stream(prompt, metadata): 127 processed_chunk = self._process_chunk(chunk) 128 if processed_chunk["delta"] is not None: 129 yield processed_chunk 130 131 132 133 def ask(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> Dict: 134 """ 135 Ask the model. 136 137 Parameters 138 ---------- 139 prompt : Union[str, Prompt] 140 The prompt to process 141 metadata : Dict, optional 142 Metadata to pass to the completion call 143 144 Returns 145 ------- 146 Dict 147 Dictionary containing: 148 - response: The model's response 149 - prompt: The original prompt 150 - model: Dictionary with provider and model name 151 - tokens: Token counts (if enabled) 152 - cost: Cost calculation (if enabled) 153 154 """ 155 if isinstance(prompt, str): 156 prompt = Prompt(prompt=prompt) 157 response = self._execute(prompt, metadata) 158 return self._process_response( 159 prompt, 160 response 161 )
Model class for interacting with AI language models.
This module provides the Model class which serves as the primary interface for interacting with various AI language models (like GPT-4, Claude-3, etc.).
Examples
Basic usage:
model = Model(provider="openai", model="gpt-4")
response = model.ask("What is the capital of France?")
With prompt:
model = Model(
provider="anthropic",
model="claude-3",
)
prompt = Prompt(
prompt="What is the capital of {country}?",
prompt_data={"country": "France"},
response_type=str
)
response = model.ask(prompt)
43 def __init__( 44 self, 45 provider: str | None = None, 46 model: str | None = None, 47 count_tokens: bool = False, 48 count_cost: bool = False, 49 max_tokens: int = None 50 ): 51 """ 52 Initialize a new Model instance. 53 54 Parameters 55 ---------- 56 provider : str 57 Name of the provider (e.g., 'openai', 'anthropic') 58 model : str 59 Name of the model (e.g., 'gpt-4', 'claude-3') 60 count_tokens : bool, optional 61 Whether to count tokens for each request 62 count_cost : bool, optional 63 Whether to calculate costs for each request 64 max_tokens : int, optional 65 Maximum number of tokens for each request 66 """ 67 super().__init__(count_tokens, count_cost, max_tokens) 68 69 if provider is None: 70 provider = Conf()["base_model"]["provider"] 71 if model is None: 72 model = Conf()["base_model"]["model"] 73 74 load_key(provider) 75 76 self.provider = provider 77 self.model = model 78 self._web_search = False
Initialize a new Model instance.
Parameters
- provider (str): Name of the provider (e.g., 'openai', 'anthropic')
- model (str): Name of the model (e.g., 'gpt-4', 'claude-3')
- count_tokens (bool, optional): Whether to count tokens for each request
- count_cost (bool, optional): Whether to calculate costs for each request
- max_tokens (int, optional): Maximum number of tokens for each request
109 async def ask_stream(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> AsyncGenerator[Dict, None]: 110 """ 111 Ask the model with streaming response. 112 113 Parameters 114 ---------- 115 prompt : Union[str, Prompt, PromptChain] 116 The prompt to process 117 metadata : Dict, optional 118 Metadata to pass to the completion call 119 120 Yields 121 ------ 122 Dict 123 Streaming response chunks 124 """ 125 yield {"provider":self.provider, "model":self.model} 126 async for chunk in self._execute_stream(prompt, metadata): 127 processed_chunk = self._process_chunk(chunk) 128 if processed_chunk["delta"] is not None: 129 yield processed_chunk
Ask the model with streaming response.
Parameters
- prompt (Union[str, Prompt, PromptChain]): The prompt to process
- metadata (Dict, optional): Metadata to pass to the completion call
Yields
- Dict: Streaming response chunks
133 def ask(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> Dict: 134 """ 135 Ask the model. 136 137 Parameters 138 ---------- 139 prompt : Union[str, Prompt] 140 The prompt to process 141 metadata : Dict, optional 142 Metadata to pass to the completion call 143 144 Returns 145 ------- 146 Dict 147 Dictionary containing: 148 - response: The model's response 149 - prompt: The original prompt 150 - model: Dictionary with provider and model name 151 - tokens: Token counts (if enabled) 152 - cost: Cost calculation (if enabled) 153 154 """ 155 if isinstance(prompt, str): 156 prompt = Prompt(prompt=prompt) 157 response = self._execute(prompt, metadata) 158 return self._process_response( 159 prompt, 160 response 161 )
Ask the model.
Parameters
- prompt (Union[str, Prompt]): The prompt to process
- metadata (Dict, optional): Metadata to pass to the completion call
Returns
- Dict: Dictionary containing:
- response: The model's response
- prompt: The original prompt
- model: Dictionary with provider and model name
- tokens: Token counts (if enabled)
- cost: Cost calculation (if enabled)
9class HostedModel(Model, ResponseProcessorMixin, PromptExecutorMixin): 10 """ 11 HostedModel is a class for interacting with self-hosted AI language models. 12 Currently support models deployed with VLLM. 13 14 Examples 15 -------- 16 Basic usage: 17 ``` 18 model = HostedModel(url="http://localhost:8000", version=1, provider="openai", model="gpt-4") 19 response = model.ask("What is the capital of France?") 20 ``` 21 22 """ 23 24 def __init__( 25 self, 26 url: str, 27 version: int = 1, 28 provider: str | None = None, 29 model: str | None = None, 30 system_prompt: str | Sequence[str] = (), 31 count_tokens: bool = False, 32 count_cost: bool = False, 33 max_tokens: int = None 34 ): 35 """ 36 Initialize a new Model instance. 37 38 Parameters 39 ---------- 40 provider : str 41 Name of the provider (e.g., 'openai', 'anthropic') 42 model : str 43 Name of the model (e.g., 'gpt-4', 'claude-3') 44 system_prompt : str | Sequence[str], optional 45 System prompt or sequence of prompts 46 count_tokens : bool, optional 47 Whether to count tokens for each request 48 count_cost : bool, optional 49 Whether to calculate costs for each request 50 max_tokens : int, optional 51 Maximum number of tokens for each request 52 """ 53 54 super().__init__( 55 provider=provider, 56 model=model, 57 count_tokens=count_tokens, 58 count_cost=count_cost, 59 max_tokens=max_tokens 60 ) 61 62 self.url = url 63 self.version = version
HostedModel is a class for interacting with self-hosted AI language models. Currently support models deployed with VLLM.
Examples
Basic usage:
model = HostedModel(url="http://localhost:8000", version=1, provider="openai", model="gpt-4")
response = model.ask("What is the capital of France?")
24 def __init__( 25 self, 26 url: str, 27 version: int = 1, 28 provider: str | None = None, 29 model: str | None = None, 30 system_prompt: str | Sequence[str] = (), 31 count_tokens: bool = False, 32 count_cost: bool = False, 33 max_tokens: int = None 34 ): 35 """ 36 Initialize a new Model instance. 37 38 Parameters 39 ---------- 40 provider : str 41 Name of the provider (e.g., 'openai', 'anthropic') 42 model : str 43 Name of the model (e.g., 'gpt-4', 'claude-3') 44 system_prompt : str | Sequence[str], optional 45 System prompt or sequence of prompts 46 count_tokens : bool, optional 47 Whether to count tokens for each request 48 count_cost : bool, optional 49 Whether to calculate costs for each request 50 max_tokens : int, optional 51 Maximum number of tokens for each request 52 """ 53 54 super().__init__( 55 provider=provider, 56 model=model, 57 count_tokens=count_tokens, 58 count_cost=count_cost, 59 max_tokens=max_tokens 60 ) 61 62 self.url = url 63 self.version = version
Initialize a new Model instance.
Parameters
- provider (str): Name of the provider (e.g., 'openai', 'anthropic')
- model (str): Name of the model (e.g., 'gpt-4', 'claude-3')
- system_prompt (str | Sequence[str], optional): System prompt or sequence of prompts
- count_tokens (bool, optional): Whether to count tokens for each request
- count_cost (bool, optional): Whether to calculate costs for each request
- max_tokens (int, optional): Maximum number of tokens for each request
11class MultiModel(BaseModel, PromptExecutorMixin, ResponseProcessorMixin): 12 """ 13 A class to execute prompts across multiple AI models in parallel. 14 15 MultiModel manages a collection of AI models and enables parallel execution of prompts 16 across all models. It's particularly useful for comparing model responses or 17 implementing ensemble approaches. 18 19 Examples 20 -------- 21 Basic comparison of models: 22 ``` 23 models = [ 24 {"provider": "openai", "model": "gpt-4"}, 25 {"provider": "anthropic", "model": "claude-3"} 26 ] 27 multi_model = MultiModel(models=models) 28 prompt = Prompt( 29 prompt="What is 2+2?", 30 response_type=int 31 ) 32 responses = multi_model.ask(prompt) 33 for resp in responses: 34 print(f"{resp['model']['name']}: {resp['response']}") 35 ``` 36 """ 37 38 def __init__( 39 self, 40 models: List[Dict[str, str]], 41 count_tokens: bool = False, 42 count_cost: bool = False 43 ): 44 """ 45 Initialize a new MultiModel instance. 46 47 Parameters 48 ---------- 49 models : List[Dict[str, str]] 50 List of dictionaries with provider and model information 51 count_tokens : bool, optional 52 Whether to count tokens for each request 53 count_cost : bool, optional 54 Whether to calculate costs for each request 55 """ 56 super().__init__(count_tokens, count_cost) 57 self._models = [ 58 Model( 59 provider=model['provider'], 60 model=model['model'], 61 count_tokens=count_tokens, 62 count_cost=count_cost 63 ) for model in models 64 ] 65 66 async def _task(self, model: Model, prompt: Union[str, Prompt, PromptChain]) -> Dict: 67 """ 68 Execute a single model task asynchronously. 69 70 Parameters 71 ---------- 72 model : Model 73 The model instance to use 74 prompt : Union[str, Prompt, PromptChain] 75 The prompt to process 76 77 Returns 78 ------- 79 Dict 80 Dictionary containing: 81 - response: The model's response 82 - prompt: The original prompt 83 - model: Dictionary with provider and model name 84 - tokens: Token counts (if enabled) 85 - cost: Cost calculation (if enabled) 86 """ 87 response = await self._execute_async(prompt, model._agent) 88 return self._process_response( 89 prompt, 90 response, 91 model.provider, 92 model.model, 93 self._count_tokens, 94 self._count_cost 95 ) 96 97 async def _ask_async(self, prompt: Union[str, Prompt, PromptChain]) -> List[Dict]: 98 """ 99 Ask all models asynchronously. 100 101 Parameters 102 ---------- 103 prompt : Union[str, Prompt, PromptChain] 104 The prompt to process across all models 105 106 Returns 107 ------- 108 List[Dict] 109 List of response dictionaries, one per model, each containing: 110 - response: The model's response 111 - prompt: The original prompt 112 - model: Dictionary with provider and model name 113 - tokens: Token counts (if enabled) 114 - cost: Cost calculation (if enabled) 115 116 Examples 117 -------- 118 Using async/await: 119 >>> responses = await multi_model.ask_async("What is 2+2?") 120 >>> for resp in responses: 121 ... print(f"{resp['model']['name']}: {resp['response']}") 122 """ 123 tasks = [self._task(model, prompt) for model in self._models] 124 return await asyncio.gather(*tasks) 125 126 def ask(self, prompt: Union[str, Prompt]) -> List[Dict]: 127 """ 128 Ask all models. 129 130 Parameters 131 ---------- 132 prompt : Union[str, Prompt] 133 The prompt to process across all models 134 135 Returns 136 ------- 137 List[Dict] 138 List of response dictionaries, one per model, each containing: 139 - response: The model's response 140 - prompt: The original prompt 141 - model: Dictionary with provider and model name 142 - tokens: Token counts (if enabled) 143 - cost: Cost calculation (if enabled) 144 145 """ 146 return asyncio.run(self.ask_async(prompt))
A class to execute prompts across multiple AI models in parallel.
MultiModel manages a collection of AI models and enables parallel execution of prompts across all models. It's particularly useful for comparing model responses or implementing ensemble approaches.
Examples
Basic comparison of models:
models = [
{"provider": "openai", "model": "gpt-4"},
{"provider": "anthropic", "model": "claude-3"}
]
multi_model = MultiModel(models=models)
prompt = Prompt(
prompt="What is 2+2?",
response_type=int
)
responses = multi_model.ask(prompt)
for resp in responses:
print(f"{resp['model']['name']}: {resp['response']}")
38 def __init__( 39 self, 40 models: List[Dict[str, str]], 41 count_tokens: bool = False, 42 count_cost: bool = False 43 ): 44 """ 45 Initialize a new MultiModel instance. 46 47 Parameters 48 ---------- 49 models : List[Dict[str, str]] 50 List of dictionaries with provider and model information 51 count_tokens : bool, optional 52 Whether to count tokens for each request 53 count_cost : bool, optional 54 Whether to calculate costs for each request 55 """ 56 super().__init__(count_tokens, count_cost) 57 self._models = [ 58 Model( 59 provider=model['provider'], 60 model=model['model'], 61 count_tokens=count_tokens, 62 count_cost=count_cost 63 ) for model in models 64 ]
Initialize a new MultiModel instance.
Parameters
- models (List[Dict[str, str]]): List of dictionaries with provider and model information
- count_tokens (bool, optional): Whether to count tokens for each request
- count_cost (bool, optional): Whether to calculate costs for each request
126 def ask(self, prompt: Union[str, Prompt]) -> List[Dict]: 127 """ 128 Ask all models. 129 130 Parameters 131 ---------- 132 prompt : Union[str, Prompt] 133 The prompt to process across all models 134 135 Returns 136 ------- 137 List[Dict] 138 List of response dictionaries, one per model, each containing: 139 - response: The model's response 140 - prompt: The original prompt 141 - model: Dictionary with provider and model name 142 - tokens: Token counts (if enabled) 143 - cost: Cost calculation (if enabled) 144 145 """ 146 return asyncio.run(self.ask_async(prompt))
Ask all models.
Parameters
- prompt (Union[str, Prompt]): The prompt to process across all models
Returns
- List[Dict]: List of response dictionaries, one per model, each containing:
- response: The model's response
- prompt: The original prompt
- model: Dictionary with provider and model name
- tokens: Token counts (if enabled)
- cost: Cost calculation (if enabled)
12class CollaborativeModel(BaseModel, PromptExecutorMixin, ResponseProcessorMixin): 13 """ 14 A class to implement collaborative decision making across multiple AI models. 15 16 CollaborativeModel manages a collection of AI models and an aggregator model. 17 It executes prompts across all models in parallel and then uses the aggregator 18 to synthesize a final response based on all individual responses. 19 20 Examples 21 -------- 22 Basic collaborative analysis: 23 ``` 24 models = [ 25 {"provider": "openai", "model": "gpt-4"}, 26 {"provider": "anthropic", "model": "claude-3"} 27 ] 28 aggregator = {"provider": "openai", "model": "gpt-4"} 29 collab = CollaborativeModel(models=models, aggregator=aggregator) 30 response = collab.ask("Explain quantum computing") 31 print(response["response"]) # Aggregated response 32 for ind_resp in response["individual_responses"]: 33 print(f"{ind_resp['model']['name']}: {ind_resp['response']}") 34 ``` 35 """ 36 37 def __init__( 38 self, 39 models: List[Dict[str, str]], 40 aggregator: Dict[str, str], 41 count_tokens: bool = False, 42 count_cost: bool = False 43 ): 44 """ 45 Initialize a new CollaborativeModel instance. 46 47 Parameters 48 ---------- 49 models : List[Dict[str, str]] 50 List of dictionaries with provider and model information 51 aggregator : Dict[str, str] 52 Dictionary with provider and model information for the aggregator 53 count_tokens : bool, optional 54 Whether to count tokens for each request 55 count_cost : bool, optional 56 Whether to calculate costs for each request 57 """ 58 super().__init__(count_tokens, count_cost) 59 60 self._multi_model = MultiModel( 61 models=models, 62 count_tokens=count_tokens, 63 count_cost=count_cost 64 ) 65 66 self._aggregator = Model( 67 provider=aggregator['provider'], 68 model=aggregator['model'], 69 count_tokens=count_tokens, 70 count_cost=count_cost 71 ) 72 73 def _format_aggregator_prompt(self, prompt: Union[str, Prompt, PromptChain], responses: List[Dict]) -> str: 74 """ 75 Format the prompt for the aggregator model. 76 77 Parameters 78 ---------- 79 prompt : Union[str, Prompt, PromptChain] 80 The original prompt 81 responses : List[Dict] 82 List of responses from individual models 83 84 Returns 85 ------- 86 str 87 Formatted prompt for the aggregator including original question 88 and all model responses 89 """ 90 prompt_text = str(prompt) 91 model_responses = "\n\n".join([ 92 f"Model {i+1} ({response['model']['provider']} - {response['model']['name']}):\n{response['response']}" 93 for i, response in enumerate(responses) 94 ]) 95 96 return f"""Please analyze the following responses from different models and provide a comprehensive answer: 97 Original Question: {prompt_text} 98 Model Responses: 99 {model_responses} 100 Please provide a well-reasoned response that takes into account all the information above.""" 101 102 async def _ask_async(self, prompt: Union[str, Prompt, PromptChain]) -> Dict: 103 """ 104 Ask all models and aggregate their responses asynchronously. 105 106 Parameters 107 ---------- 108 prompt : Union[str, Prompt, PromptChain] 109 The prompt to process across all models 110 111 Returns 112 ------- 113 Dict 114 Dictionary containing: 115 - response: The aggregated response 116 - prompt: The original prompt 117 - model: Dictionary with aggregator's provider and model name 118 - tokens: Token counts (if enabled) 119 - cost: Cost calculation (if enabled) 120 - individual_responses: List of responses from individual models 121 122 Examples 123 -------- 124 Using async/await: 125 >>> response = await collab.ask_async("What is consciousness?") 126 >>> print(response["response"]) # Aggregated response 127 >>> for resp in response["individual_responses"]: 128 ... print(f"{resp['model']['name']}: {resp['response']}") 129 """ 130 # Get responses from all models 131 model_responses = await self._multi_model.ask_async(prompt) 132 133 # Get aggregator response 134 aggregator_prompt = self._format_aggregator_prompt(prompt, model_responses) 135 aggregator_response = await self._execute_async(aggregator_prompt, self._aggregator._agent) 136 137 # Process aggregator response 138 processed_aggregator = self._process_response( 139 aggregator_prompt, 140 aggregator_response, 141 self._aggregator.provider, 142 self._aggregator.model, 143 self._count_tokens, 144 self._count_cost 145 ) 146 147 processed_aggregator["individual_responses"] = model_responses 148 return processed_aggregator 149 150 def ask(self, prompt: Union[str, Prompt, PromptChain]) -> Dict: 151 """ 152 Ask all models and aggregate their responses synchronously. 153 154 Parameters 155 ---------- 156 prompt : Union[str, Prompt, PromptChain] 157 The prompt to process across all models 158 159 Returns 160 ------- 161 Dict 162 Dictionary containing: 163 - response: The aggregated response 164 - prompt: The original prompt 165 - model: Dictionary with aggregator's provider and model name 166 - tokens: Token counts (if enabled) 167 - cost: Cost calculation (if enabled) 168 - individual_responses: List of responses from individual models 169 170 """ 171 return asyncio.run(self.ask_async(prompt))
A class to implement collaborative decision making across multiple AI models.
CollaborativeModel manages a collection of AI models and an aggregator model. It executes prompts across all models in parallel and then uses the aggregator to synthesize a final response based on all individual responses.
Examples
Basic collaborative analysis:
models = [
{"provider": "openai", "model": "gpt-4"},
{"provider": "anthropic", "model": "claude-3"}
]
aggregator = {"provider": "openai", "model": "gpt-4"}
collab = CollaborativeModel(models=models, aggregator=aggregator)
response = collab.ask("Explain quantum computing")
print(response["response"]) # Aggregated response
for ind_resp in response["individual_responses"]:
print(f"{ind_resp['model']['name']}: {ind_resp['response']}")
37 def __init__( 38 self, 39 models: List[Dict[str, str]], 40 aggregator: Dict[str, str], 41 count_tokens: bool = False, 42 count_cost: bool = False 43 ): 44 """ 45 Initialize a new CollaborativeModel instance. 46 47 Parameters 48 ---------- 49 models : List[Dict[str, str]] 50 List of dictionaries with provider and model information 51 aggregator : Dict[str, str] 52 Dictionary with provider and model information for the aggregator 53 count_tokens : bool, optional 54 Whether to count tokens for each request 55 count_cost : bool, optional 56 Whether to calculate costs for each request 57 """ 58 super().__init__(count_tokens, count_cost) 59 60 self._multi_model = MultiModel( 61 models=models, 62 count_tokens=count_tokens, 63 count_cost=count_cost 64 ) 65 66 self._aggregator = Model( 67 provider=aggregator['provider'], 68 model=aggregator['model'], 69 count_tokens=count_tokens, 70 count_cost=count_cost 71 )
Initialize a new CollaborativeModel instance.
Parameters
- models (List[Dict[str, str]]): List of dictionaries with provider and model information
- aggregator (Dict[str, str]): Dictionary with provider and model information for the aggregator
- count_tokens (bool, optional): Whether to count tokens for each request
- count_cost (bool, optional): Whether to calculate costs for each request
150 def ask(self, prompt: Union[str, Prompt, PromptChain]) -> Dict: 151 """ 152 Ask all models and aggregate their responses synchronously. 153 154 Parameters 155 ---------- 156 prompt : Union[str, Prompt, PromptChain] 157 The prompt to process across all models 158 159 Returns 160 ------- 161 Dict 162 Dictionary containing: 163 - response: The aggregated response 164 - prompt: The original prompt 165 - model: Dictionary with aggregator's provider and model name 166 - tokens: Token counts (if enabled) 167 - cost: Cost calculation (if enabled) 168 - individual_responses: List of responses from individual models 169 170 """ 171 return asyncio.run(self.ask_async(prompt))
Ask all models and aggregate their responses synchronously.
Parameters
- prompt (Union[str, Prompt, PromptChain]): The prompt to process across all models
Returns
- Dict: Dictionary containing:
- response: The aggregated response
- prompt: The original prompt
- model: Dictionary with aggregator's provider and model name
- tokens: Token counts (if enabled)
- cost: Cost calculation (if enabled)
- individual_responses: List of responses from individual models
5class ImageModel: 6 """ 7 A class to interact with AI image generation models. 8 9 ImageModel provides an interface for generating images from text prompts using 10 AI models. Currently supports OpenAI's DALL-E 3, with potential for expansion 11 to other providers and models in the future. 12 13 Examples 14 -------- 15 Basic image generation: 16 ``` 17 model = ImageModel(provider="openai", model="dall-e-3") 18 response = model.generate("A beautiful garden with flowers") 19 ``` 20 """ 21 22 def __init__(self, provider: str, model: str): 23 """ 24 Initialize a new ImageModel instance. 25 26 Parameters 27 ---------- 28 provider : str 29 Name of the provider (currently only "openai" is supported) 30 model : str 31 Name of the model (currently only "dall-e-3" is supported) 32 33 Raises 34 ------ 35 ValueError 36 If an unsupported provider or model is specified 37 """ 38 self.provider = provider 39 self.model = model 40 41 if provider.lower() != "openai": 42 raise ValueError(f"Provider {provider} not supported") 43 if model.lower() != "dall-e-3": 44 raise ValueError(f"Model {model} not supported") 45 46 load_key(provider) 47 self._client = OpenAI() 48 49 def generate(self, 50 prompt: str, 51 size: str = "1024x1024", 52 quality: str = "standard", 53 n: int = 1) -> dict: 54 """ 55 Generate images from a text prompt. 56 57 Parameters 58 ---------- 59 prompt : str 60 The text description of the image to generate 61 size : str, optional 62 The size of the generated image(s). Options: 63 - "1024x1024" (default) 64 - "1792x1024" 65 - "1024x1792" 66 quality : str, optional 67 The quality of the generated image(s). Options: 68 - "standard" (default) 69 - "hd" 70 n : int, optional 71 Number of images to generate (default: 1) 72 73 Returns 74 ------- 75 dict 76 OpenAI image generation response containing: 77 - created: timestamp 78 - data: list of generated images with URLs and other metadata 79 """ 80 response = self._client.images.generate( 81 model=self.model, 82 prompt=prompt, 83 size=size, 84 quality=quality, 85 n=n, 86 ) 87 return response
A class to interact with AI image generation models.
ImageModel provides an interface for generating images from text prompts using AI models. Currently supports OpenAI's DALL-E 3, with potential for expansion to other providers and models in the future.
Examples
Basic image generation:
model = ImageModel(provider="openai", model="dall-e-3")
response = model.generate("A beautiful garden with flowers")
22 def __init__(self, provider: str, model: str): 23 """ 24 Initialize a new ImageModel instance. 25 26 Parameters 27 ---------- 28 provider : str 29 Name of the provider (currently only "openai" is supported) 30 model : str 31 Name of the model (currently only "dall-e-3" is supported) 32 33 Raises 34 ------ 35 ValueError 36 If an unsupported provider or model is specified 37 """ 38 self.provider = provider 39 self.model = model 40 41 if provider.lower() != "openai": 42 raise ValueError(f"Provider {provider} not supported") 43 if model.lower() != "dall-e-3": 44 raise ValueError(f"Model {model} not supported") 45 46 load_key(provider) 47 self._client = OpenAI()
Initialize a new ImageModel instance.
Parameters
- provider (str): Name of the provider (currently only "openai" is supported)
- model (str): Name of the model (currently only "dall-e-3" is supported)
Raises
- ValueError: If an unsupported provider or model is specified
49 def generate(self, 50 prompt: str, 51 size: str = "1024x1024", 52 quality: str = "standard", 53 n: int = 1) -> dict: 54 """ 55 Generate images from a text prompt. 56 57 Parameters 58 ---------- 59 prompt : str 60 The text description of the image to generate 61 size : str, optional 62 The size of the generated image(s). Options: 63 - "1024x1024" (default) 64 - "1792x1024" 65 - "1024x1792" 66 quality : str, optional 67 The quality of the generated image(s). Options: 68 - "standard" (default) 69 - "hd" 70 n : int, optional 71 Number of images to generate (default: 1) 72 73 Returns 74 ------- 75 dict 76 OpenAI image generation response containing: 77 - created: timestamp 78 - data: list of generated images with URLs and other metadata 79 """ 80 response = self._client.images.generate( 81 model=self.model, 82 prompt=prompt, 83 size=size, 84 quality=quality, 85 n=n, 86 ) 87 return response
Generate images from a text prompt.
Parameters
- prompt (str): The text description of the image to generate
- size (str, optional):
The size of the generated image(s). Options:
- "1024x1024" (default)
- "1792x1024"
- "1024x1792"
- quality (str, optional):
The quality of the generated image(s). Options:
- "standard" (default)
- "hd"
- n (int, optional): Number of images to generate (default: 1)
Returns
- dict: OpenAI image generation response containing:
- created: timestamp
- data: list of generated images with URLs and other metadata
7class VoiceModel: 8 """ 9 A class for text-to-speech using various AI voice models. 10 11 VoiceModel provides an interface for converting text to speech using different 12 AI voice providers. Currently supports ElevenLabs and OpenAI voice models. 13 14 Examples 15 -------- 16 Basic usage with ElevenLabs: 17 >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM") 18 >>> audio_file = model.speak("Hello, world!") 19 20 Using different return types: 21 >>> # Save as MP3 file 22 >>> audio_file = model.speak("Hello, world!", return_type="output.mp3") 23 24 >>> # Get as base64 string 25 >>> audio_b64 = model.speak("Hello, world!", return_type="base64") 26 27 >>> # Get as bytes 28 >>> audio_bytes = model.speak("Hello, world!", return_type="bytes") 29 30 Streaming audio generation: 31 >>> async for audio_chunk in model.stream("Long text to convert to speech"): 32 ... print(f"Generated audio chunk: {audio_chunk}") 33 """ 34 35 def __init__(self, provider: str, model: str, voice: str): 36 """ 37 Initialize a new VoiceModel instance. 38 39 Parameters 40 ---------- 41 provider : str 42 The voice provider to use (e.g., 'elevenlabs', 'openai') 43 model : str 44 The specific voice model to use 45 voice : str 46 The voice ID or voice name to use for speech generation 47 48 Raises 49 ------ 50 ImportError 51 If the required provider library is not installed 52 """ 53 load_key(provider) 54 self._provider = provider 55 self._model = model 56 self._voice = voice 57 58 if self._provider == "elevenlabs": 59 try: 60 from elevenlabs.client import ElevenLabs 61 except ImportError: 62 raise ImportError("elevenlabs is not installed. Please install it with 'pip install elevenlabs'") 63 64 self._client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY")) 65 66 def speak(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"): 67 """ 68 Convert text to speech and return the audio in the specified format. 69 70 The method automatically splits long text into optimal chunks based on 71 sentence boundaries to ensure high-quality audio generation. 72 73 Parameters 74 ---------- 75 text : str 76 The text to convert to speech 77 min_chars_per_sentence : int, optional 78 Minimum number of characters per sentence group for optimal audio 79 generation (default: 100) 80 return_type : str, optional 81 The format to return the audio in. Options: 82 - File path ending with '.mp3' or '.wav' to save to file 83 - 'base64' to return as base64 encoded string 84 - 'bytes' to return as raw bytes (default: "audio.mp3") 85 86 Returns 87 ------- 88 str or bytes 89 The generated audio in the specified format: 90 - If return_type is a file path: returns the file path 91 - If return_type is 'base64': returns base64 encoded string 92 - If return_type is 'bytes': returns raw audio bytes 93 94 Raises 95 ------ 96 ValueError 97 If return_type is not a valid option 98 99 Examples 100 -------- 101 >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM") 102 >>> # Save to file 103 >>> audio_file = model.speak("Hello, world!", return_type="output.mp3") 104 >>> # Get as base64 105 >>> audio_b64 = model.speak("Hello, world!", return_type="base64") 106 >>> # Get as bytes 107 >>> audio_bytes = model.speak("Hello, world!", return_type="bytes") 108 """ 109 audio_groups = self._generate_audio_groups(text, min_chars_per_sentence) 110 audio_chunks = [] 111 112 for group in audio_groups: 113 114 response = self._generate(group) 115 116 audio_chunks.append(response) 117 118 if return_type.endswith(".mp3") or return_type.endswith(".wav"): 119 combined_audio = self._combine_bytes_chunks(audio_chunks) 120 with open(return_type, 'wb') as f: 121 f.write(combined_audio) 122 return return_type 123 elif return_type == "base64": 124 combined_audio = self._combine_bytes_chunks(audio_chunks) 125 return base64.b64encode(combined_audio).decode('utf-8') 126 elif return_type == "bytes": 127 return self._combine_bytes_chunks(audio_chunks) 128 else: 129 raise ValueError(f"Invalid return type: {return_type}") 130 131 def _combine_bytes_chunks(self, audio_chunks: list) -> bytes: 132 """ 133 Combine audio chunks into a single bytes object. 134 135 Parameters 136 ---------- 137 audio_chunks : list 138 List of audio byte chunks to combine 139 140 Returns 141 ------- 142 bytes 143 Combined audio data as bytes 144 """ 145 combined = b"" 146 for chunk in audio_chunks: 147 combined += chunk 148 return combined 149 150 def _generate_audio_groups(self, text: str, min_chars_per_sentence: int): 151 """ 152 Generate optimized sentence groups for audio generation. 153 154 Splits text into sentence groups that meet the minimum character requirement 155 for optimal audio quality while respecting sentence boundaries. 156 157 Parameters 158 ---------- 159 text : str 160 The text to split into groups 161 min_chars_per_sentence : int 162 Minimum number of characters per sentence group 163 164 Returns 165 ------- 166 list 167 List of sentence groups optimized for audio generation 168 """ 169 sentences = [s.strip() for s in text.split(".") if s.strip()] 170 audio_groups = [] 171 i = 0 172 173 while i < len(sentences): 174 current_group = sentences[i] 175 j = i + 1 176 177 # Continue adding subsequent sentences until min_chars_per_sentence is exceeded 178 while j < len(sentences): 179 next_sentence = sentences[j] 180 combined_length = len(current_group + ". " + next_sentence) 181 182 # If the combination exceeds the limit, stop 183 if combined_length > min_chars_per_sentence: 184 break 185 186 # Otherwise, join the next sentence 187 current_group += ". " + next_sentence 188 j += 1 189 190 audio_groups.append(current_group) 191 # Move to the next unprocessed sentence 192 i = j 193 194 return audio_groups 195 196 def _generate(self, text): 197 """ 198 Generate audio from text using the configured provider. 199 200 Parameters 201 ---------- 202 text : str 203 The text to convert to speech 204 205 Returns 206 ------- 207 bytes 208 Generated audio data as bytes 209 """ 210 if self._provider == "elevenlabs": 211 response = self._client.text_to_speech.convert( 212 text=text, 213 voice_id=self._voice, 214 model_id=self._model, 215 output_format="mp3_44100_128", 216 ) 217 response_bytes = b"" 218 for r in response: 219 response_bytes += r 220 return response_bytes 221 else: 222 response = speech( 223 model=self._provider+"/"+self._model, 224 voice=self._voice, 225 input=text, 226 ) 227 return response.content 228 229 async def stream(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"): 230 """ 231 Stream audio generation for long texts. 232 233 Generates audio in chunks for long texts, yielding each chunk as it's 234 generated. This is useful for real-time audio generation or processing 235 very long texts. 236 237 Parameters 238 ---------- 239 text : str 240 The text to convert to speech 241 min_chars_per_sentence : int, optional 242 Minimum number of characters per sentence group for optimal audio 243 generation (default: 100) 244 return_type : str, optional 245 The format to return each audio chunk in. Options: 246 - File path ending with '.mp3' or '.wav' to save each chunk to file 247 - 'base64' to return each chunk as base64 encoded string 248 - 'bytes' to return each chunk as raw bytes (default: "audio.mp3") 249 250 Yields 251 ------ 252 str or bytes 253 Audio chunks in the specified format: 254 - If return_type is a file path: yields the file path for each chunk 255 - If return_type is 'base64': yields base64 encoded strings 256 - If return_type is 'bytes': yields raw audio bytes 257 258 Raises 259 ------ 260 ValueError 261 If return_type is not a valid option 262 263 Examples 264 -------- 265 >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM") 266 >>> async for audio_chunk in model.stream("Long text to convert to speech"): 267 ... print(f"Generated audio chunk: {audio_chunk}") 268 """ 269 audio_groups = self._generate_audio_groups(text, min_chars_per_sentence) 270 271 for _, group in enumerate(audio_groups): 272 # Generate audio for the sentence group 273 response = self._generate(group) 274 275 if return_type.endswith(".mp3") or return_type.endswith(".wav"): 276 with open(return_type, 'wb') as f: 277 f.write(response) 278 yield return_type 279 elif return_type == "base64": 280 yield base64.b64encode(response).decode('utf-8') 281 elif return_type == "bytes": 282 yield response 283 else: 284 raise ValueError(f"Invalid return type: {return_type}")
A class for text-to-speech using various AI voice models.
VoiceModel provides an interface for converting text to speech using different AI voice providers. Currently supports ElevenLabs and OpenAI voice models.
Examples
Basic usage with ElevenLabs:
>>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
>>> audio_file = model.speak("Hello, world!")
Using different return types:
>>> # Save as MP3 file
>>> audio_file = model.speak("Hello, world!", return_type="output.mp3")
>>> # Get as base64 string
>>> audio_b64 = model.speak("Hello, world!", return_type="base64")
>>> # Get as bytes
>>> audio_bytes = model.speak("Hello, world!", return_type="bytes")
Streaming audio generation:
>>> async for audio_chunk in model.stream("Long text to convert to speech"):
... print(f"Generated audio chunk: {audio_chunk}")
35 def __init__(self, provider: str, model: str, voice: str): 36 """ 37 Initialize a new VoiceModel instance. 38 39 Parameters 40 ---------- 41 provider : str 42 The voice provider to use (e.g., 'elevenlabs', 'openai') 43 model : str 44 The specific voice model to use 45 voice : str 46 The voice ID or voice name to use for speech generation 47 48 Raises 49 ------ 50 ImportError 51 If the required provider library is not installed 52 """ 53 load_key(provider) 54 self._provider = provider 55 self._model = model 56 self._voice = voice 57 58 if self._provider == "elevenlabs": 59 try: 60 from elevenlabs.client import ElevenLabs 61 except ImportError: 62 raise ImportError("elevenlabs is not installed. Please install it with 'pip install elevenlabs'") 63 64 self._client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
Initialize a new VoiceModel instance.
Parameters
- provider (str): The voice provider to use (e.g., 'elevenlabs', 'openai')
- model (str): The specific voice model to use
- voice (str): The voice ID or voice name to use for speech generation
Raises
- ImportError: If the required provider library is not installed
66 def speak(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"): 67 """ 68 Convert text to speech and return the audio in the specified format. 69 70 The method automatically splits long text into optimal chunks based on 71 sentence boundaries to ensure high-quality audio generation. 72 73 Parameters 74 ---------- 75 text : str 76 The text to convert to speech 77 min_chars_per_sentence : int, optional 78 Minimum number of characters per sentence group for optimal audio 79 generation (default: 100) 80 return_type : str, optional 81 The format to return the audio in. Options: 82 - File path ending with '.mp3' or '.wav' to save to file 83 - 'base64' to return as base64 encoded string 84 - 'bytes' to return as raw bytes (default: "audio.mp3") 85 86 Returns 87 ------- 88 str or bytes 89 The generated audio in the specified format: 90 - If return_type is a file path: returns the file path 91 - If return_type is 'base64': returns base64 encoded string 92 - If return_type is 'bytes': returns raw audio bytes 93 94 Raises 95 ------ 96 ValueError 97 If return_type is not a valid option 98 99 Examples 100 -------- 101 >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM") 102 >>> # Save to file 103 >>> audio_file = model.speak("Hello, world!", return_type="output.mp3") 104 >>> # Get as base64 105 >>> audio_b64 = model.speak("Hello, world!", return_type="base64") 106 >>> # Get as bytes 107 >>> audio_bytes = model.speak("Hello, world!", return_type="bytes") 108 """ 109 audio_groups = self._generate_audio_groups(text, min_chars_per_sentence) 110 audio_chunks = [] 111 112 for group in audio_groups: 113 114 response = self._generate(group) 115 116 audio_chunks.append(response) 117 118 if return_type.endswith(".mp3") or return_type.endswith(".wav"): 119 combined_audio = self._combine_bytes_chunks(audio_chunks) 120 with open(return_type, 'wb') as f: 121 f.write(combined_audio) 122 return return_type 123 elif return_type == "base64": 124 combined_audio = self._combine_bytes_chunks(audio_chunks) 125 return base64.b64encode(combined_audio).decode('utf-8') 126 elif return_type == "bytes": 127 return self._combine_bytes_chunks(audio_chunks) 128 else: 129 raise ValueError(f"Invalid return type: {return_type}")
Convert text to speech and return the audio in the specified format.
The method automatically splits long text into optimal chunks based on sentence boundaries to ensure high-quality audio generation.
Parameters
- text (str): The text to convert to speech
- min_chars_per_sentence (int, optional): Minimum number of characters per sentence group for optimal audio generation (default: 100)
- return_type (str, optional):
The format to return the audio in. Options:
- File path ending with '.mp3' or '.wav' to save to file
- 'base64' to return as base64 encoded string
- 'bytes' to return as raw bytes (default: "audio.mp3")
Returns
- str or bytes: The generated audio in the specified format:
- If return_type is a file path: returns the file path
- If return_type is 'base64': returns base64 encoded string
- If return_type is 'bytes': returns raw audio bytes
Raises
- ValueError: If return_type is not a valid option
Examples
>>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
>>> # Save to file
>>> audio_file = model.speak("Hello, world!", return_type="output.mp3")
>>> # Get as base64
>>> audio_b64 = model.speak("Hello, world!", return_type="base64")
>>> # Get as bytes
>>> audio_bytes = model.speak("Hello, world!", return_type="bytes")
229 async def stream(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"): 230 """ 231 Stream audio generation for long texts. 232 233 Generates audio in chunks for long texts, yielding each chunk as it's 234 generated. This is useful for real-time audio generation or processing 235 very long texts. 236 237 Parameters 238 ---------- 239 text : str 240 The text to convert to speech 241 min_chars_per_sentence : int, optional 242 Minimum number of characters per sentence group for optimal audio 243 generation (default: 100) 244 return_type : str, optional 245 The format to return each audio chunk in. Options: 246 - File path ending with '.mp3' or '.wav' to save each chunk to file 247 - 'base64' to return each chunk as base64 encoded string 248 - 'bytes' to return each chunk as raw bytes (default: "audio.mp3") 249 250 Yields 251 ------ 252 str or bytes 253 Audio chunks in the specified format: 254 - If return_type is a file path: yields the file path for each chunk 255 - If return_type is 'base64': yields base64 encoded strings 256 - If return_type is 'bytes': yields raw audio bytes 257 258 Raises 259 ------ 260 ValueError 261 If return_type is not a valid option 262 263 Examples 264 -------- 265 >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM") 266 >>> async for audio_chunk in model.stream("Long text to convert to speech"): 267 ... print(f"Generated audio chunk: {audio_chunk}") 268 """ 269 audio_groups = self._generate_audio_groups(text, min_chars_per_sentence) 270 271 for _, group in enumerate(audio_groups): 272 # Generate audio for the sentence group 273 response = self._generate(group) 274 275 if return_type.endswith(".mp3") or return_type.endswith(".wav"): 276 with open(return_type, 'wb') as f: 277 f.write(response) 278 yield return_type 279 elif return_type == "base64": 280 yield base64.b64encode(response).decode('utf-8') 281 elif return_type == "bytes": 282 yield response 283 else: 284 raise ValueError(f"Invalid return type: {return_type}")
Stream audio generation for long texts.
Generates audio in chunks for long texts, yielding each chunk as it's generated. This is useful for real-time audio generation or processing very long texts.
Parameters
- text (str): The text to convert to speech
- min_chars_per_sentence (int, optional): Minimum number of characters per sentence group for optimal audio generation (default: 100)
- return_type (str, optional):
The format to return each audio chunk in. Options:
- File path ending with '.mp3' or '.wav' to save each chunk to file
- 'base64' to return each chunk as base64 encoded string
- 'bytes' to return each chunk as raw bytes (default: "audio.mp3")
Yields
- str or bytes: Audio chunks in the specified format:
- If return_type is a file path: yields the file path for each chunk
- If return_type is 'base64': yields base64 encoded strings
- If return_type is 'bytes': yields raw audio bytes
Raises
- ValueError: If return_type is not a valid option
Examples
>>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
>>> async for audio_chunk in model.stream("Long text to convert to speech"):
... print(f"Generated audio chunk: {audio_chunk}")