monoai.models

Models are the core of MonoAI. They are responsible for executing prompts and returning responses.

This package uses lazy loading to avoid importing heavy optional dependencies at module import time. Classes are imported only when accessed.

 1"""
 2Models are the core of MonoAI. They are responsible for executing prompts and returning responses.
 3
 4This package uses lazy loading to avoid importing heavy optional dependencies
 5at module import time. Classes are imported only when accessed.
 6"""
 7
 8from .model import Model
 9from .hosted_model import HostedModel
10from .multi_model import MultiModel
11from .collaborative_model import CollaborativeModel
12from .image_model import ImageModel
13from .voice_model import VoiceModel
14
15__all__ = ['Model', "HostedModel", 'MultiModel', 'CollaborativeModel', 'ImageModel', 'VoiceModel']
class Model(monoai.models._base_model.BaseModel, monoai.models._response_processor.ResponseProcessorMixin, monoai.models._prompt_executor.PromptExecutorMixin):
 13class Model(BaseModel, ResponseProcessorMixin, PromptExecutorMixin):
 14    """
 15    Model class for interacting with AI language models.
 16
 17    This module provides the Model class which serves as the primary interface for interacting
 18    with various AI language models (like GPT-4, Claude-3, etc.).
 19
 20    Examples
 21    --------
 22    Basic usage:
 23    ```
 24    model = Model(provider="openai", model="gpt-4")
 25    response = model.ask("What is the capital of France?")
 26    ```
 27
 28    With prompt:
 29    ```
 30    model = Model(
 31        provider="anthropic",
 32        model="claude-3",
 33    )
 34    prompt = Prompt(
 35        prompt="What is the capital of {country}?",
 36        prompt_data={"country": "France"},
 37        response_type=str
 38    )
 39    response = model.ask(prompt)
 40    ```
 41    """
 42
 43    def __init__(
 44        self, 
 45        provider: str | None = None, 
 46        model: str | None = None, 
 47        count_tokens: bool = False, 
 48        count_cost: bool = False,
 49        max_tokens: int = None
 50    ):
 51        """
 52        Initialize a new Model instance.
 53
 54        Parameters
 55        ----------
 56        provider : str
 57            Name of the provider (e.g., 'openai', 'anthropic')
 58        model : str
 59            Name of the model (e.g., 'gpt-4', 'claude-3')
 60        count_tokens : bool, optional
 61            Whether to count tokens for each request
 62        count_cost : bool, optional
 63            Whether to calculate costs for each request
 64        max_tokens : int, optional
 65            Maximum number of tokens for each request
 66        """
 67        super().__init__(count_tokens, count_cost, max_tokens)
 68        
 69        if provider is None:
 70            provider = Conf()["base_model"]["provider"]
 71        if model is None:
 72            model = Conf()["base_model"]["model"]
 73
 74        load_key(provider)
 75
 76        self.provider = provider
 77        self.model = model
 78        self._web_search = False
 79
 80    async def _ask_async(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> Dict:
 81        """
 82        Ask the model asynchronously.
 83
 84        Parameters
 85        ----------
 86        prompt : Union[str, Prompt]
 87            The prompt to process
 88        metadata : Dict, optional
 89            Metadata to pass to the completion call
 90
 91        Returns
 92        -------
 93        Dict
 94            Dictionary containing:
 95            - response: The model's response
 96            - prompt: The original prompt
 97            - model: Dictionary with provider and model name
 98            - tokens: Token counts (if enabled)
 99            - cost: Cost calculation (if enabled)
100
101        """
102        response = await self._execute_async(prompt, metadata)
103        return self._process_response(
104            prompt,
105            response,
106        )
107
108    
109    async def ask_stream(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> AsyncGenerator[Dict, None]:
110        """
111        Ask the model with streaming response.
112
113        Parameters
114        ----------
115        prompt : Union[str, Prompt, PromptChain]
116            The prompt to process
117        metadata : Dict, optional
118            Metadata to pass to the completion call
119
120        Yields
121        ------
122        Dict
123            Streaming response chunks
124        """
125        yield {"provider":self.provider, "model":self.model}
126        async for chunk in self._execute_stream(prompt, metadata):
127            processed_chunk = self._process_chunk(chunk)
128            if processed_chunk["delta"] is not None:
129                yield processed_chunk
130
131
132
133    def ask(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> Dict:
134        """
135        Ask the model.
136
137        Parameters
138        ----------
139        prompt : Union[str, Prompt]
140            The prompt to process
141        metadata : Dict, optional
142            Metadata to pass to the completion call
143
144        Returns
145        -------
146        Dict
147            Dictionary containing:
148            - response: The model's response
149            - prompt: The original prompt
150            - model: Dictionary with provider and model name
151            - tokens: Token counts (if enabled)
152            - cost: Cost calculation (if enabled)
153
154        """
155        if isinstance(prompt, str):
156            prompt = Prompt(prompt=prompt)
157        response = self._execute(prompt, metadata)
158        return self._process_response(
159            prompt,
160            response
161        )

Model class for interacting with AI language models.

This module provides the Model class which serves as the primary interface for interacting with various AI language models (like GPT-4, Claude-3, etc.).

Examples

Basic usage:

model = Model(provider="openai", model="gpt-4")
response = model.ask("What is the capital of France?")

With prompt:

model = Model(
    provider="anthropic",
    model="claude-3",
)
prompt = Prompt(
    prompt="What is the capital of {country}?",
    prompt_data={"country": "France"},
    response_type=str
)
response = model.ask(prompt)
Model( provider: str | None = None, model: str | None = None, count_tokens: bool = False, count_cost: bool = False, max_tokens: int = None)
43    def __init__(
44        self, 
45        provider: str | None = None, 
46        model: str | None = None, 
47        count_tokens: bool = False, 
48        count_cost: bool = False,
49        max_tokens: int = None
50    ):
51        """
52        Initialize a new Model instance.
53
54        Parameters
55        ----------
56        provider : str
57            Name of the provider (e.g., 'openai', 'anthropic')
58        model : str
59            Name of the model (e.g., 'gpt-4', 'claude-3')
60        count_tokens : bool, optional
61            Whether to count tokens for each request
62        count_cost : bool, optional
63            Whether to calculate costs for each request
64        max_tokens : int, optional
65            Maximum number of tokens for each request
66        """
67        super().__init__(count_tokens, count_cost, max_tokens)
68        
69        if provider is None:
70            provider = Conf()["base_model"]["provider"]
71        if model is None:
72            model = Conf()["base_model"]["model"]
73
74        load_key(provider)
75
76        self.provider = provider
77        self.model = model
78        self._web_search = False

Initialize a new Model instance.

Parameters
  • provider (str): Name of the provider (e.g., 'openai', 'anthropic')
  • model (str): Name of the model (e.g., 'gpt-4', 'claude-3')
  • count_tokens (bool, optional): Whether to count tokens for each request
  • count_cost (bool, optional): Whether to calculate costs for each request
  • max_tokens (int, optional): Maximum number of tokens for each request
provider
model
async def ask_stream( self, prompt: Union[str, monoai.prompts.Prompt, monoai.prompts.PromptChain], metadata: Dict = {}) -> AsyncGenerator[Dict, NoneType]:
109    async def ask_stream(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> AsyncGenerator[Dict, None]:
110        """
111        Ask the model with streaming response.
112
113        Parameters
114        ----------
115        prompt : Union[str, Prompt, PromptChain]
116            The prompt to process
117        metadata : Dict, optional
118            Metadata to pass to the completion call
119
120        Yields
121        ------
122        Dict
123            Streaming response chunks
124        """
125        yield {"provider":self.provider, "model":self.model}
126        async for chunk in self._execute_stream(prompt, metadata):
127            processed_chunk = self._process_chunk(chunk)
128            if processed_chunk["delta"] is not None:
129                yield processed_chunk

Ask the model with streaming response.

Parameters
  • prompt (Union[str, Prompt, PromptChain]): The prompt to process
  • metadata (Dict, optional): Metadata to pass to the completion call
Yields
  • Dict: Streaming response chunks
def ask( self, prompt: Union[str, monoai.prompts.Prompt, monoai.prompts.PromptChain], metadata: Dict = {}) -> Dict:
133    def ask(self, prompt: Union[str, Prompt, PromptChain], metadata: Dict = {}) -> Dict:
134        """
135        Ask the model.
136
137        Parameters
138        ----------
139        prompt : Union[str, Prompt]
140            The prompt to process
141        metadata : Dict, optional
142            Metadata to pass to the completion call
143
144        Returns
145        -------
146        Dict
147            Dictionary containing:
148            - response: The model's response
149            - prompt: The original prompt
150            - model: Dictionary with provider and model name
151            - tokens: Token counts (if enabled)
152            - cost: Cost calculation (if enabled)
153
154        """
155        if isinstance(prompt, str):
156            prompt = Prompt(prompt=prompt)
157        response = self._execute(prompt, metadata)
158        return self._process_response(
159            prompt,
160            response
161        )

Ask the model.

Parameters
  • prompt (Union[str, Prompt]): The prompt to process
  • metadata (Dict, optional): Metadata to pass to the completion call
Returns
  • Dict: Dictionary containing:
    • response: The model's response
    • prompt: The original prompt
    • model: Dictionary with provider and model name
    • tokens: Token counts (if enabled)
    • cost: Cost calculation (if enabled)
class HostedModel(monoai.models.Model, monoai.models._response_processor.ResponseProcessorMixin, monoai.models._prompt_executor.PromptExecutorMixin):
 9class HostedModel(Model, ResponseProcessorMixin, PromptExecutorMixin):
10    """
11    HostedModel is a class for interacting with self-hosted AI language models.
12    Currently support models deployed with VLLM.
13    
14    Examples
15    --------
16    Basic usage:
17    ```
18    model = HostedModel(url="http://localhost:8000", version=1, provider="openai", model="gpt-4")
19    response = model.ask("What is the capital of France?")
20    ```
21
22    """
23
24    def __init__(
25        self, 
26        url: str,
27        version: int = 1,
28        provider: str | None = None, 
29        model: str | None = None, 
30        system_prompt: str | Sequence[str] = (),
31        count_tokens: bool = False, 
32        count_cost: bool = False,
33        max_tokens: int = None
34    ):
35        """
36        Initialize a new Model instance.
37
38        Parameters
39        ----------
40        provider : str
41            Name of the provider (e.g., 'openai', 'anthropic')
42        model : str
43            Name of the model (e.g., 'gpt-4', 'claude-3')
44        system_prompt : str | Sequence[str], optional
45            System prompt or sequence of prompts
46        count_tokens : bool, optional
47            Whether to count tokens for each request
48        count_cost : bool, optional
49            Whether to calculate costs for each request
50        max_tokens : int, optional
51            Maximum number of tokens for each request
52        """
53
54        super().__init__(
55            provider=provider,
56            model=model,
57            count_tokens=count_tokens, 
58            count_cost=count_cost, 
59            max_tokens=max_tokens
60        )
61        
62        self.url = url
63        self.version = version

HostedModel is a class for interacting with self-hosted AI language models. Currently support models deployed with VLLM.

Examples

Basic usage:

model = HostedModel(url="http://localhost:8000", version=1, provider="openai", model="gpt-4")
response = model.ask("What is the capital of France?")
HostedModel( url: str, version: int = 1, provider: str | None = None, model: str | None = None, system_prompt: Union[str, Sequence[str]] = (), count_tokens: bool = False, count_cost: bool = False, max_tokens: int = None)
24    def __init__(
25        self, 
26        url: str,
27        version: int = 1,
28        provider: str | None = None, 
29        model: str | None = None, 
30        system_prompt: str | Sequence[str] = (),
31        count_tokens: bool = False, 
32        count_cost: bool = False,
33        max_tokens: int = None
34    ):
35        """
36        Initialize a new Model instance.
37
38        Parameters
39        ----------
40        provider : str
41            Name of the provider (e.g., 'openai', 'anthropic')
42        model : str
43            Name of the model (e.g., 'gpt-4', 'claude-3')
44        system_prompt : str | Sequence[str], optional
45            System prompt or sequence of prompts
46        count_tokens : bool, optional
47            Whether to count tokens for each request
48        count_cost : bool, optional
49            Whether to calculate costs for each request
50        max_tokens : int, optional
51            Maximum number of tokens for each request
52        """
53
54        super().__init__(
55            provider=provider,
56            model=model,
57            count_tokens=count_tokens, 
58            count_cost=count_cost, 
59            max_tokens=max_tokens
60        )
61        
62        self.url = url
63        self.version = version

Initialize a new Model instance.

Parameters
  • provider (str): Name of the provider (e.g., 'openai', 'anthropic')
  • model (str): Name of the model (e.g., 'gpt-4', 'claude-3')
  • system_prompt (str | Sequence[str], optional): System prompt or sequence of prompts
  • count_tokens (bool, optional): Whether to count tokens for each request
  • count_cost (bool, optional): Whether to calculate costs for each request
  • max_tokens (int, optional): Maximum number of tokens for each request
url
version
class MultiModel(monoai.models._base_model.BaseModel, monoai.models._prompt_executor.PromptExecutorMixin, monoai.models._response_processor.ResponseProcessorMixin):
 11class MultiModel(BaseModel, PromptExecutorMixin, ResponseProcessorMixin):
 12    """
 13    A class to execute prompts across multiple AI models in parallel.
 14    
 15    MultiModel manages a collection of AI models and enables parallel execution of prompts
 16    across all models. It's particularly useful for comparing model responses or
 17    implementing ensemble approaches.
 18
 19    Examples
 20    --------
 21    Basic comparison of models:
 22    ```
 23    models = [
 24        {"provider": "openai", "model": "gpt-4"},
 25        {"provider": "anthropic", "model": "claude-3"}
 26    ]
 27    multi_model = MultiModel(models=models)
 28    prompt = Prompt(
 29        prompt="What is 2+2?",
 30        response_type=int
 31    )
 32    responses = multi_model.ask(prompt)
 33    for resp in responses:
 34        print(f"{resp['model']['name']}: {resp['response']}")
 35    ```
 36    """
 37
 38    def __init__(
 39        self, 
 40        models: List[Dict[str, str]], 
 41        count_tokens: bool = False, 
 42        count_cost: bool = False
 43    ):
 44        """
 45        Initialize a new MultiModel instance.
 46
 47        Parameters
 48        ----------
 49        models : List[Dict[str, str]]
 50            List of dictionaries with provider and model information
 51        count_tokens : bool, optional
 52            Whether to count tokens for each request
 53        count_cost : bool, optional
 54            Whether to calculate costs for each request
 55        """
 56        super().__init__(count_tokens, count_cost)
 57        self._models = [
 58            Model(
 59                provider=model['provider'],
 60                model=model['model'],
 61                count_tokens=count_tokens,
 62                count_cost=count_cost
 63            ) for model in models
 64        ]
 65
 66    async def _task(self, model: Model, prompt: Union[str, Prompt, PromptChain]) -> Dict:
 67        """
 68        Execute a single model task asynchronously.
 69
 70        Parameters
 71        ----------
 72        model : Model
 73            The model instance to use
 74        prompt : Union[str, Prompt, PromptChain]
 75            The prompt to process
 76
 77        Returns
 78        -------
 79        Dict
 80            Dictionary containing:
 81            - response: The model's response
 82            - prompt: The original prompt
 83            - model: Dictionary with provider and model name
 84            - tokens: Token counts (if enabled)
 85            - cost: Cost calculation (if enabled)
 86        """
 87        response = await self._execute_async(prompt, model._agent)
 88        return self._process_response(
 89            prompt,
 90            response,
 91            model.provider,
 92            model.model,
 93            self._count_tokens,
 94            self._count_cost
 95        )
 96
 97    async def _ask_async(self, prompt: Union[str, Prompt, PromptChain]) -> List[Dict]:
 98        """
 99        Ask all models asynchronously.
100
101        Parameters
102        ----------
103        prompt : Union[str, Prompt, PromptChain]
104            The prompt to process across all models
105
106        Returns
107        -------
108        List[Dict]
109            List of response dictionaries, one per model, each containing:
110            - response: The model's response
111            - prompt: The original prompt
112            - model: Dictionary with provider and model name
113            - tokens: Token counts (if enabled)
114            - cost: Cost calculation (if enabled)
115
116        Examples
117        --------
118        Using async/await:
119            >>> responses = await multi_model.ask_async("What is 2+2?")
120            >>> for resp in responses:
121            ...     print(f"{resp['model']['name']}: {resp['response']}")
122        """
123        tasks = [self._task(model, prompt) for model in self._models]
124        return await asyncio.gather(*tasks)
125
126    def ask(self, prompt: Union[str, Prompt]) -> List[Dict]:
127        """
128        Ask all models.
129
130        Parameters
131        ----------
132        prompt : Union[str, Prompt]
133            The prompt to process across all models
134
135        Returns
136        -------
137        List[Dict]
138            List of response dictionaries, one per model, each containing:
139            - response: The model's response
140            - prompt: The original prompt
141            - model: Dictionary with provider and model name
142            - tokens: Token counts (if enabled)
143            - cost: Cost calculation (if enabled)
144
145        """
146        return asyncio.run(self.ask_async(prompt))

A class to execute prompts across multiple AI models in parallel.

MultiModel manages a collection of AI models and enables parallel execution of prompts across all models. It's particularly useful for comparing model responses or implementing ensemble approaches.

Examples

Basic comparison of models:

models = [
    {"provider": "openai", "model": "gpt-4"},
    {"provider": "anthropic", "model": "claude-3"}
]
multi_model = MultiModel(models=models)
prompt = Prompt(
    prompt="What is 2+2?",
    response_type=int
)
responses = multi_model.ask(prompt)
for resp in responses:
    print(f"{resp['model']['name']}: {resp['response']}")
MultiModel( models: List[Dict[str, str]], count_tokens: bool = False, count_cost: bool = False)
38    def __init__(
39        self, 
40        models: List[Dict[str, str]], 
41        count_tokens: bool = False, 
42        count_cost: bool = False
43    ):
44        """
45        Initialize a new MultiModel instance.
46
47        Parameters
48        ----------
49        models : List[Dict[str, str]]
50            List of dictionaries with provider and model information
51        count_tokens : bool, optional
52            Whether to count tokens for each request
53        count_cost : bool, optional
54            Whether to calculate costs for each request
55        """
56        super().__init__(count_tokens, count_cost)
57        self._models = [
58            Model(
59                provider=model['provider'],
60                model=model['model'],
61                count_tokens=count_tokens,
62                count_cost=count_cost
63            ) for model in models
64        ]

Initialize a new MultiModel instance.

Parameters
  • models (List[Dict[str, str]]): List of dictionaries with provider and model information
  • count_tokens (bool, optional): Whether to count tokens for each request
  • count_cost (bool, optional): Whether to calculate costs for each request
def ask(self, prompt: Union[str, monoai.prompts.Prompt]) -> List[Dict]:
126    def ask(self, prompt: Union[str, Prompt]) -> List[Dict]:
127        """
128        Ask all models.
129
130        Parameters
131        ----------
132        prompt : Union[str, Prompt]
133            The prompt to process across all models
134
135        Returns
136        -------
137        List[Dict]
138            List of response dictionaries, one per model, each containing:
139            - response: The model's response
140            - prompt: The original prompt
141            - model: Dictionary with provider and model name
142            - tokens: Token counts (if enabled)
143            - cost: Cost calculation (if enabled)
144
145        """
146        return asyncio.run(self.ask_async(prompt))

Ask all models.

Parameters
  • prompt (Union[str, Prompt]): The prompt to process across all models
Returns
  • List[Dict]: List of response dictionaries, one per model, each containing:
    • response: The model's response
    • prompt: The original prompt
    • model: Dictionary with provider and model name
    • tokens: Token counts (if enabled)
    • cost: Cost calculation (if enabled)
class CollaborativeModel(monoai.models._base_model.BaseModel, monoai.models._prompt_executor.PromptExecutorMixin, monoai.models._response_processor.ResponseProcessorMixin):
 12class CollaborativeModel(BaseModel, PromptExecutorMixin, ResponseProcessorMixin):
 13    """
 14    A class to implement collaborative decision making across multiple AI models.
 15    
 16    CollaborativeModel manages a collection of AI models and an aggregator model.
 17    It executes prompts across all models in parallel and then uses the aggregator
 18    to synthesize a final response based on all individual responses.
 19
 20    Examples
 21    --------
 22    Basic collaborative analysis:
 23    ```
 24    models = [
 25        {"provider": "openai", "model": "gpt-4"},
 26        {"provider": "anthropic", "model": "claude-3"}
 27    ]
 28    aggregator = {"provider": "openai", "model": "gpt-4"}
 29    collab = CollaborativeModel(models=models, aggregator=aggregator)
 30    response = collab.ask("Explain quantum computing")
 31    print(response["response"])  # Aggregated response
 32    for ind_resp in response["individual_responses"]:
 33        print(f"{ind_resp['model']['name']}: {ind_resp['response']}")
 34    ```
 35    """
 36
 37    def __init__(
 38        self,
 39        models: List[Dict[str, str]],
 40        aggregator: Dict[str, str],
 41        count_tokens: bool = False,
 42        count_cost: bool = False
 43    ):
 44        """
 45        Initialize a new CollaborativeModel instance.
 46
 47        Parameters
 48        ----------
 49        models : List[Dict[str, str]]
 50            List of dictionaries with provider and model information
 51        aggregator : Dict[str, str]
 52            Dictionary with provider and model information for the aggregator
 53        count_tokens : bool, optional
 54            Whether to count tokens for each request
 55        count_cost : bool, optional
 56            Whether to calculate costs for each request
 57        """
 58        super().__init__(count_tokens, count_cost)
 59
 60        self._multi_model = MultiModel(
 61            models=models,
 62            count_tokens=count_tokens,
 63            count_cost=count_cost
 64        )
 65
 66        self._aggregator = Model(
 67            provider=aggregator['provider'],
 68            model=aggregator['model'],
 69            count_tokens=count_tokens,
 70            count_cost=count_cost
 71        )
 72
 73    def _format_aggregator_prompt(self, prompt: Union[str, Prompt, PromptChain], responses: List[Dict]) -> str:
 74        """
 75        Format the prompt for the aggregator model.
 76
 77        Parameters
 78        ----------
 79        prompt : Union[str, Prompt, PromptChain]
 80            The original prompt
 81        responses : List[Dict]
 82            List of responses from individual models
 83
 84        Returns
 85        -------
 86        str
 87            Formatted prompt for the aggregator including original question
 88            and all model responses
 89        """
 90        prompt_text = str(prompt)
 91        model_responses = "\n\n".join([
 92            f"Model {i+1} ({response['model']['provider']} - {response['model']['name']}):\n{response['response']}"
 93            for i, response in enumerate(responses)
 94        ])
 95        
 96        return f"""Please analyze the following responses from different models and provide a comprehensive answer:
 97                    Original Question: {prompt_text}
 98                    Model Responses:
 99                    {model_responses}
100                    Please provide a well-reasoned response that takes into account all the information above."""
101
102    async def _ask_async(self, prompt: Union[str, Prompt, PromptChain]) -> Dict:
103        """
104        Ask all models and aggregate their responses asynchronously.
105
106        Parameters
107        ----------
108        prompt : Union[str, Prompt, PromptChain]
109            The prompt to process across all models
110
111        Returns
112        -------
113        Dict
114            Dictionary containing:
115            - response: The aggregated response
116            - prompt: The original prompt
117            - model: Dictionary with aggregator's provider and model name
118            - tokens: Token counts (if enabled)
119            - cost: Cost calculation (if enabled)
120            - individual_responses: List of responses from individual models
121
122        Examples
123        --------
124        Using async/await:
125            >>> response = await collab.ask_async("What is consciousness?")
126            >>> print(response["response"])  # Aggregated response
127            >>> for resp in response["individual_responses"]:
128            ...     print(f"{resp['model']['name']}: {resp['response']}")
129        """
130        # Get responses from all models
131        model_responses = await self._multi_model.ask_async(prompt)
132        
133        # Get aggregator response
134        aggregator_prompt = self._format_aggregator_prompt(prompt, model_responses)
135        aggregator_response = await self._execute_async(aggregator_prompt, self._aggregator._agent)
136        
137        # Process aggregator response
138        processed_aggregator = self._process_response(
139            aggregator_prompt,
140            aggregator_response,
141            self._aggregator.provider,
142            self._aggregator.model,
143            self._count_tokens,
144            self._count_cost
145        )
146
147        processed_aggregator["individual_responses"] = model_responses
148        return processed_aggregator
149
150    def ask(self, prompt: Union[str, Prompt, PromptChain]) -> Dict:
151        """
152        Ask all models and aggregate their responses synchronously.
153
154        Parameters
155        ----------
156        prompt : Union[str, Prompt, PromptChain]
157            The prompt to process across all models
158
159        Returns
160        -------
161        Dict
162            Dictionary containing:
163            - response: The aggregated response
164            - prompt: The original prompt
165            - model: Dictionary with aggregator's provider and model name
166            - tokens: Token counts (if enabled)
167            - cost: Cost calculation (if enabled)
168            - individual_responses: List of responses from individual models
169
170        """
171        return asyncio.run(self.ask_async(prompt))

A class to implement collaborative decision making across multiple AI models.

CollaborativeModel manages a collection of AI models and an aggregator model. It executes prompts across all models in parallel and then uses the aggregator to synthesize a final response based on all individual responses.

Examples

Basic collaborative analysis:

models = [
    {"provider": "openai", "model": "gpt-4"},
    {"provider": "anthropic", "model": "claude-3"}
]
aggregator = {"provider": "openai", "model": "gpt-4"}
collab = CollaborativeModel(models=models, aggregator=aggregator)
response = collab.ask("Explain quantum computing")
print(response["response"])  # Aggregated response
for ind_resp in response["individual_responses"]:
    print(f"{ind_resp['model']['name']}: {ind_resp['response']}")
CollaborativeModel( models: List[Dict[str, str]], aggregator: Dict[str, str], count_tokens: bool = False, count_cost: bool = False)
37    def __init__(
38        self,
39        models: List[Dict[str, str]],
40        aggregator: Dict[str, str],
41        count_tokens: bool = False,
42        count_cost: bool = False
43    ):
44        """
45        Initialize a new CollaborativeModel instance.
46
47        Parameters
48        ----------
49        models : List[Dict[str, str]]
50            List of dictionaries with provider and model information
51        aggregator : Dict[str, str]
52            Dictionary with provider and model information for the aggregator
53        count_tokens : bool, optional
54            Whether to count tokens for each request
55        count_cost : bool, optional
56            Whether to calculate costs for each request
57        """
58        super().__init__(count_tokens, count_cost)
59
60        self._multi_model = MultiModel(
61            models=models,
62            count_tokens=count_tokens,
63            count_cost=count_cost
64        )
65
66        self._aggregator = Model(
67            provider=aggregator['provider'],
68            model=aggregator['model'],
69            count_tokens=count_tokens,
70            count_cost=count_cost
71        )

Initialize a new CollaborativeModel instance.

Parameters
  • models (List[Dict[str, str]]): List of dictionaries with provider and model information
  • aggregator (Dict[str, str]): Dictionary with provider and model information for the aggregator
  • count_tokens (bool, optional): Whether to count tokens for each request
  • count_cost (bool, optional): Whether to calculate costs for each request
def ask( self, prompt: Union[str, monoai.prompts.Prompt, monoai.prompts.PromptChain]) -> Dict:
150    def ask(self, prompt: Union[str, Prompt, PromptChain]) -> Dict:
151        """
152        Ask all models and aggregate their responses synchronously.
153
154        Parameters
155        ----------
156        prompt : Union[str, Prompt, PromptChain]
157            The prompt to process across all models
158
159        Returns
160        -------
161        Dict
162            Dictionary containing:
163            - response: The aggregated response
164            - prompt: The original prompt
165            - model: Dictionary with aggregator's provider and model name
166            - tokens: Token counts (if enabled)
167            - cost: Cost calculation (if enabled)
168            - individual_responses: List of responses from individual models
169
170        """
171        return asyncio.run(self.ask_async(prompt))

Ask all models and aggregate their responses synchronously.

Parameters
  • prompt (Union[str, Prompt, PromptChain]): The prompt to process across all models
Returns
  • Dict: Dictionary containing:
    • response: The aggregated response
    • prompt: The original prompt
    • model: Dictionary with aggregator's provider and model name
    • tokens: Token counts (if enabled)
    • cost: Cost calculation (if enabled)
    • individual_responses: List of responses from individual models
class ImageModel:
 5class ImageModel:
 6    """
 7    A class to interact with AI image generation models.
 8    
 9    ImageModel provides an interface for generating images from text prompts using
10    AI models. Currently supports OpenAI's DALL-E 3, with potential for expansion
11    to other providers and models in the future.
12
13    Examples
14    --------
15    Basic image generation:
16    ```
17    model = ImageModel(provider="openai", model="dall-e-3")
18    response = model.generate("A beautiful garden with flowers")
19    ```
20    """
21
22    def __init__(self, provider: str, model: str):
23        """
24        Initialize a new ImageModel instance.
25
26        Parameters
27        ----------
28        provider : str
29            Name of the provider (currently only "openai" is supported)
30        model : str
31            Name of the model (currently only "dall-e-3" is supported)
32
33        Raises
34        ------
35        ValueError
36            If an unsupported provider or model is specified
37        """
38        self.provider = provider
39        self.model = model
40
41        if provider.lower() != "openai":
42            raise ValueError(f"Provider {provider} not supported")
43        if model.lower() != "dall-e-3":
44            raise ValueError(f"Model {model} not supported")
45        
46        load_key(provider)
47        self._client = OpenAI()
48
49    def generate(self, 
50                prompt: str, 
51                size: str = "1024x1024", 
52                quality: str = "standard", 
53                n: int = 1) -> dict:
54        """
55        Generate images from a text prompt.
56
57        Parameters
58        ----------
59        prompt : str
60            The text description of the image to generate
61        size : str, optional
62            The size of the generated image(s). Options:
63            - "1024x1024" (default)
64            - "1792x1024"
65            - "1024x1792"
66        quality : str, optional
67            The quality of the generated image(s). Options:
68            - "standard" (default)
69            - "hd"
70        n : int, optional
71            Number of images to generate (default: 1)
72
73        Returns
74        -------
75        dict
76            OpenAI image generation response containing:
77            - created: timestamp
78            - data: list of generated images with URLs and other metadata
79        """
80        response = self._client.images.generate(
81            model=self.model,
82            prompt=prompt,
83            size=size,
84            quality=quality,
85            n=n,
86        )
87        return response

A class to interact with AI image generation models.

ImageModel provides an interface for generating images from text prompts using AI models. Currently supports OpenAI's DALL-E 3, with potential for expansion to other providers and models in the future.

Examples

Basic image generation:

model = ImageModel(provider="openai", model="dall-e-3")
response = model.generate("A beautiful garden with flowers")
ImageModel(provider: str, model: str)
22    def __init__(self, provider: str, model: str):
23        """
24        Initialize a new ImageModel instance.
25
26        Parameters
27        ----------
28        provider : str
29            Name of the provider (currently only "openai" is supported)
30        model : str
31            Name of the model (currently only "dall-e-3" is supported)
32
33        Raises
34        ------
35        ValueError
36            If an unsupported provider or model is specified
37        """
38        self.provider = provider
39        self.model = model
40
41        if provider.lower() != "openai":
42            raise ValueError(f"Provider {provider} not supported")
43        if model.lower() != "dall-e-3":
44            raise ValueError(f"Model {model} not supported")
45        
46        load_key(provider)
47        self._client = OpenAI()

Initialize a new ImageModel instance.

Parameters
  • provider (str): Name of the provider (currently only "openai" is supported)
  • model (str): Name of the model (currently only "dall-e-3" is supported)
Raises
  • ValueError: If an unsupported provider or model is specified
provider
model
def generate( self, prompt: str, size: str = '1024x1024', quality: str = 'standard', n: int = 1) -> dict:
49    def generate(self, 
50                prompt: str, 
51                size: str = "1024x1024", 
52                quality: str = "standard", 
53                n: int = 1) -> dict:
54        """
55        Generate images from a text prompt.
56
57        Parameters
58        ----------
59        prompt : str
60            The text description of the image to generate
61        size : str, optional
62            The size of the generated image(s). Options:
63            - "1024x1024" (default)
64            - "1792x1024"
65            - "1024x1792"
66        quality : str, optional
67            The quality of the generated image(s). Options:
68            - "standard" (default)
69            - "hd"
70        n : int, optional
71            Number of images to generate (default: 1)
72
73        Returns
74        -------
75        dict
76            OpenAI image generation response containing:
77            - created: timestamp
78            - data: list of generated images with URLs and other metadata
79        """
80        response = self._client.images.generate(
81            model=self.model,
82            prompt=prompt,
83            size=size,
84            quality=quality,
85            n=n,
86        )
87        return response

Generate images from a text prompt.

Parameters
  • prompt (str): The text description of the image to generate
  • size (str, optional): The size of the generated image(s). Options:
    • "1024x1024" (default)
    • "1792x1024"
    • "1024x1792"
  • quality (str, optional): The quality of the generated image(s). Options:
    • "standard" (default)
    • "hd"
  • n (int, optional): Number of images to generate (default: 1)
Returns
  • dict: OpenAI image generation response containing:
    • created: timestamp
    • data: list of generated images with URLs and other metadata
class VoiceModel:
  7class VoiceModel:
  8    """
  9    A class for text-to-speech using various AI voice models.
 10    
 11    VoiceModel provides an interface for converting text to speech using different
 12    AI voice providers. Currently supports ElevenLabs and OpenAI voice models.
 13            
 14    Examples
 15    --------
 16    Basic usage with ElevenLabs:
 17    >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
 18    >>> audio_file = model.speak("Hello, world!")
 19    
 20    Using different return types:
 21    >>> # Save as MP3 file
 22    >>> audio_file = model.speak("Hello, world!", return_type="output.mp3")
 23    
 24    >>> # Get as base64 string
 25    >>> audio_b64 = model.speak("Hello, world!", return_type="base64")
 26    
 27    >>> # Get as bytes
 28    >>> audio_bytes = model.speak("Hello, world!", return_type="bytes")
 29    
 30    Streaming audio generation:
 31    >>> async for audio_chunk in model.stream("Long text to convert to speech"):
 32    ...     print(f"Generated audio chunk: {audio_chunk}")
 33    """
 34    
 35    def __init__(self, provider: str, model: str, voice: str):
 36        """
 37        Initialize a new VoiceModel instance.
 38        
 39        Parameters
 40        ----------
 41        provider : str
 42            The voice provider to use (e.g., 'elevenlabs', 'openai')
 43        model : str
 44            The specific voice model to use
 45        voice : str
 46            The voice ID or voice name to use for speech generation
 47            
 48        Raises
 49        ------
 50        ImportError
 51            If the required provider library is not installed
 52        """
 53        load_key(provider)
 54        self._provider = provider
 55        self._model = model
 56        self._voice = voice
 57        
 58        if self._provider == "elevenlabs":
 59            try:
 60                from elevenlabs.client import ElevenLabs
 61            except ImportError:
 62                raise ImportError("elevenlabs is not installed. Please install it with 'pip install elevenlabs'")
 63
 64            self._client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
 65        
 66    def speak(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"):
 67        """
 68        Convert text to speech and return the audio in the specified format.
 69        
 70        The method automatically splits long text into optimal chunks based on
 71        sentence boundaries to ensure high-quality audio generation.
 72        
 73        Parameters
 74        ----------
 75        text : str
 76            The text to convert to speech
 77        min_chars_per_sentence : int, optional
 78            Minimum number of characters per sentence group for optimal audio
 79            generation (default: 100)
 80        return_type : str, optional
 81            The format to return the audio in. Options:
 82            - File path ending with '.mp3' or '.wav' to save to file
 83            - 'base64' to return as base64 encoded string
 84            - 'bytes' to return as raw bytes (default: "audio.mp3")
 85            
 86        Returns
 87        -------
 88        str or bytes
 89            The generated audio in the specified format:
 90            - If return_type is a file path: returns the file path
 91            - If return_type is 'base64': returns base64 encoded string
 92            - If return_type is 'bytes': returns raw audio bytes
 93            
 94        Raises
 95        ------
 96        ValueError
 97            If return_type is not a valid option
 98            
 99        Examples
100        --------
101        >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
102        >>> # Save to file
103        >>> audio_file = model.speak("Hello, world!", return_type="output.mp3")
104        >>> # Get as base64
105        >>> audio_b64 = model.speak("Hello, world!", return_type="base64")
106        >>> # Get as bytes
107        >>> audio_bytes = model.speak("Hello, world!", return_type="bytes")
108        """
109        audio_groups = self._generate_audio_groups(text, min_chars_per_sentence)
110        audio_chunks = []
111        
112        for group in audio_groups:
113            
114            response = self._generate(group)
115            
116            audio_chunks.append(response)
117        
118        if return_type.endswith(".mp3") or return_type.endswith(".wav"):
119            combined_audio = self._combine_bytes_chunks(audio_chunks)
120            with open(return_type, 'wb') as f:
121                f.write(combined_audio)
122            return return_type
123        elif return_type == "base64":
124            combined_audio = self._combine_bytes_chunks(audio_chunks)
125            return base64.b64encode(combined_audio).decode('utf-8')
126        elif return_type == "bytes":
127            return self._combine_bytes_chunks(audio_chunks)
128        else:
129            raise ValueError(f"Invalid return type: {return_type}")
130    
131    def _combine_bytes_chunks(self, audio_chunks: list) -> bytes:
132        """
133        Combine audio chunks into a single bytes object.
134        
135        Parameters
136        ----------
137        audio_chunks : list
138            List of audio byte chunks to combine
139            
140        Returns
141        -------
142        bytes
143            Combined audio data as bytes
144        """
145        combined = b""
146        for chunk in audio_chunks:
147            combined += chunk
148        return combined
149    
150    def _generate_audio_groups(self, text: str, min_chars_per_sentence: int):
151        """
152        Generate optimized sentence groups for audio generation.
153        
154        Splits text into sentence groups that meet the minimum character requirement
155        for optimal audio quality while respecting sentence boundaries.
156        
157        Parameters
158        ----------
159        text : str
160            The text to split into groups
161        min_chars_per_sentence : int
162            Minimum number of characters per sentence group
163            
164        Returns
165        -------
166        list
167            List of sentence groups optimized for audio generation
168        """
169        sentences = [s.strip() for s in text.split(".") if s.strip()]
170        audio_groups = []
171        i = 0
172        
173        while i < len(sentences):
174            current_group = sentences[i]
175            j = i + 1
176            
177            # Continue adding subsequent sentences until min_chars_per_sentence is exceeded
178            while j < len(sentences):
179                next_sentence = sentences[j]
180                combined_length = len(current_group + ". " + next_sentence)
181                
182                # If the combination exceeds the limit, stop
183                if combined_length > min_chars_per_sentence:
184                    break
185                
186                # Otherwise, join the next sentence
187                current_group += ". " + next_sentence
188                j += 1
189            
190            audio_groups.append(current_group)
191            # Move to the next unprocessed sentence
192            i = j
193        
194        return audio_groups
195        
196    def _generate(self, text):
197        """
198        Generate audio from text using the configured provider.
199        
200        Parameters
201        ----------
202        text : str
203            The text to convert to speech
204            
205        Returns
206        -------
207        bytes
208            Generated audio data as bytes
209        """
210        if self._provider == "elevenlabs":
211            response = self._client.text_to_speech.convert(
212                text=text,
213                voice_id=self._voice,
214                model_id=self._model,
215                output_format="mp3_44100_128",
216            )
217            response_bytes = b""
218            for r in response:
219                response_bytes += r
220            return response_bytes
221        else:
222            response = speech(
223                model=self._provider+"/"+self._model,
224                voice=self._voice,
225                input=text,
226            )
227            return response.content
228    
229    async def stream(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"):
230        """
231        Stream audio generation for long texts.
232        
233        Generates audio in chunks for long texts, yielding each chunk as it's
234        generated. This is useful for real-time audio generation or processing
235        very long texts.
236        
237        Parameters
238        ----------
239        text : str
240            The text to convert to speech
241        min_chars_per_sentence : int, optional
242            Minimum number of characters per sentence group for optimal audio
243            generation (default: 100)
244        return_type : str, optional
245            The format to return each audio chunk in. Options:
246            - File path ending with '.mp3' or '.wav' to save each chunk to file
247            - 'base64' to return each chunk as base64 encoded string
248            - 'bytes' to return each chunk as raw bytes (default: "audio.mp3")
249            
250        Yields
251        ------
252        str or bytes
253            Audio chunks in the specified format:
254            - If return_type is a file path: yields the file path for each chunk
255            - If return_type is 'base64': yields base64 encoded strings
256            - If return_type is 'bytes': yields raw audio bytes
257            
258        Raises
259        ------
260        ValueError
261            If return_type is not a valid option
262            
263        Examples
264        --------
265        >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
266        >>> async for audio_chunk in model.stream("Long text to convert to speech"):
267        ...     print(f"Generated audio chunk: {audio_chunk}")
268        """
269        audio_groups = self._generate_audio_groups(text, min_chars_per_sentence)
270        
271        for _, group in enumerate(audio_groups):
272            # Generate audio for the sentence group
273            response = self._generate(group)
274
275            if return_type.endswith(".mp3") or return_type.endswith(".wav"):
276                with open(return_type, 'wb') as f:
277                    f.write(response)
278                yield return_type
279            elif return_type == "base64":
280                yield base64.b64encode(response).decode('utf-8')
281            elif return_type == "bytes":
282                yield response
283            else:
284                raise ValueError(f"Invalid return type: {return_type}")

A class for text-to-speech using various AI voice models.

VoiceModel provides an interface for converting text to speech using different AI voice providers. Currently supports ElevenLabs and OpenAI voice models.

Examples

Basic usage with ElevenLabs:

>>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
>>> audio_file = model.speak("Hello, world!")

Using different return types:

>>> # Save as MP3 file
>>> audio_file = model.speak("Hello, world!", return_type="output.mp3")
>>> # Get as base64 string
>>> audio_b64 = model.speak("Hello, world!", return_type="base64")
>>> # Get as bytes
>>> audio_bytes = model.speak("Hello, world!", return_type="bytes")

Streaming audio generation:

>>> async for audio_chunk in model.stream("Long text to convert to speech"):
...     print(f"Generated audio chunk: {audio_chunk}")
VoiceModel(provider: str, model: str, voice: str)
35    def __init__(self, provider: str, model: str, voice: str):
36        """
37        Initialize a new VoiceModel instance.
38        
39        Parameters
40        ----------
41        provider : str
42            The voice provider to use (e.g., 'elevenlabs', 'openai')
43        model : str
44            The specific voice model to use
45        voice : str
46            The voice ID or voice name to use for speech generation
47            
48        Raises
49        ------
50        ImportError
51            If the required provider library is not installed
52        """
53        load_key(provider)
54        self._provider = provider
55        self._model = model
56        self._voice = voice
57        
58        if self._provider == "elevenlabs":
59            try:
60                from elevenlabs.client import ElevenLabs
61            except ImportError:
62                raise ImportError("elevenlabs is not installed. Please install it with 'pip install elevenlabs'")
63
64            self._client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))

Initialize a new VoiceModel instance.

Parameters
  • provider (str): The voice provider to use (e.g., 'elevenlabs', 'openai')
  • model (str): The specific voice model to use
  • voice (str): The voice ID or voice name to use for speech generation
Raises
  • ImportError: If the required provider library is not installed
def speak(self, text: str, min_chars_per_sentence=100, return_type='audio.mp3'):
 66    def speak(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"):
 67        """
 68        Convert text to speech and return the audio in the specified format.
 69        
 70        The method automatically splits long text into optimal chunks based on
 71        sentence boundaries to ensure high-quality audio generation.
 72        
 73        Parameters
 74        ----------
 75        text : str
 76            The text to convert to speech
 77        min_chars_per_sentence : int, optional
 78            Minimum number of characters per sentence group for optimal audio
 79            generation (default: 100)
 80        return_type : str, optional
 81            The format to return the audio in. Options:
 82            - File path ending with '.mp3' or '.wav' to save to file
 83            - 'base64' to return as base64 encoded string
 84            - 'bytes' to return as raw bytes (default: "audio.mp3")
 85            
 86        Returns
 87        -------
 88        str or bytes
 89            The generated audio in the specified format:
 90            - If return_type is a file path: returns the file path
 91            - If return_type is 'base64': returns base64 encoded string
 92            - If return_type is 'bytes': returns raw audio bytes
 93            
 94        Raises
 95        ------
 96        ValueError
 97            If return_type is not a valid option
 98            
 99        Examples
100        --------
101        >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
102        >>> # Save to file
103        >>> audio_file = model.speak("Hello, world!", return_type="output.mp3")
104        >>> # Get as base64
105        >>> audio_b64 = model.speak("Hello, world!", return_type="base64")
106        >>> # Get as bytes
107        >>> audio_bytes = model.speak("Hello, world!", return_type="bytes")
108        """
109        audio_groups = self._generate_audio_groups(text, min_chars_per_sentence)
110        audio_chunks = []
111        
112        for group in audio_groups:
113            
114            response = self._generate(group)
115            
116            audio_chunks.append(response)
117        
118        if return_type.endswith(".mp3") or return_type.endswith(".wav"):
119            combined_audio = self._combine_bytes_chunks(audio_chunks)
120            with open(return_type, 'wb') as f:
121                f.write(combined_audio)
122            return return_type
123        elif return_type == "base64":
124            combined_audio = self._combine_bytes_chunks(audio_chunks)
125            return base64.b64encode(combined_audio).decode('utf-8')
126        elif return_type == "bytes":
127            return self._combine_bytes_chunks(audio_chunks)
128        else:
129            raise ValueError(f"Invalid return type: {return_type}")

Convert text to speech and return the audio in the specified format.

The method automatically splits long text into optimal chunks based on sentence boundaries to ensure high-quality audio generation.

Parameters
  • text (str): The text to convert to speech
  • min_chars_per_sentence (int, optional): Minimum number of characters per sentence group for optimal audio generation (default: 100)
  • return_type (str, optional): The format to return the audio in. Options:
    • File path ending with '.mp3' or '.wav' to save to file
    • 'base64' to return as base64 encoded string
    • 'bytes' to return as raw bytes (default: "audio.mp3")
Returns
  • str or bytes: The generated audio in the specified format:
    • If return_type is a file path: returns the file path
    • If return_type is 'base64': returns base64 encoded string
    • If return_type is 'bytes': returns raw audio bytes
Raises
  • ValueError: If return_type is not a valid option
Examples
>>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
>>> # Save to file
>>> audio_file = model.speak("Hello, world!", return_type="output.mp3")
>>> # Get as base64
>>> audio_b64 = model.speak("Hello, world!", return_type="base64")
>>> # Get as bytes
>>> audio_bytes = model.speak("Hello, world!", return_type="bytes")
async def stream(self, text: str, min_chars_per_sentence=100, return_type='audio.mp3'):
229    async def stream(self, text: str, min_chars_per_sentence=100, return_type="audio.mp3"):
230        """
231        Stream audio generation for long texts.
232        
233        Generates audio in chunks for long texts, yielding each chunk as it's
234        generated. This is useful for real-time audio generation or processing
235        very long texts.
236        
237        Parameters
238        ----------
239        text : str
240            The text to convert to speech
241        min_chars_per_sentence : int, optional
242            Minimum number of characters per sentence group for optimal audio
243            generation (default: 100)
244        return_type : str, optional
245            The format to return each audio chunk in. Options:
246            - File path ending with '.mp3' or '.wav' to save each chunk to file
247            - 'base64' to return each chunk as base64 encoded string
248            - 'bytes' to return each chunk as raw bytes (default: "audio.mp3")
249            
250        Yields
251        ------
252        str or bytes
253            Audio chunks in the specified format:
254            - If return_type is a file path: yields the file path for each chunk
255            - If return_type is 'base64': yields base64 encoded strings
256            - If return_type is 'bytes': yields raw audio bytes
257            
258        Raises
259        ------
260        ValueError
261            If return_type is not a valid option
262            
263        Examples
264        --------
265        >>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
266        >>> async for audio_chunk in model.stream("Long text to convert to speech"):
267        ...     print(f"Generated audio chunk: {audio_chunk}")
268        """
269        audio_groups = self._generate_audio_groups(text, min_chars_per_sentence)
270        
271        for _, group in enumerate(audio_groups):
272            # Generate audio for the sentence group
273            response = self._generate(group)
274
275            if return_type.endswith(".mp3") or return_type.endswith(".wav"):
276                with open(return_type, 'wb') as f:
277                    f.write(response)
278                yield return_type
279            elif return_type == "base64":
280                yield base64.b64encode(response).decode('utf-8')
281            elif return_type == "bytes":
282                yield response
283            else:
284                raise ValueError(f"Invalid return type: {return_type}")

Stream audio generation for long texts.

Generates audio in chunks for long texts, yielding each chunk as it's generated. This is useful for real-time audio generation or processing very long texts.

Parameters
  • text (str): The text to convert to speech
  • min_chars_per_sentence (int, optional): Minimum number of characters per sentence group for optimal audio generation (default: 100)
  • return_type (str, optional): The format to return each audio chunk in. Options:
    • File path ending with '.mp3' or '.wav' to save each chunk to file
    • 'base64' to return each chunk as base64 encoded string
    • 'bytes' to return each chunk as raw bytes (default: "audio.mp3")
Yields
  • str or bytes: Audio chunks in the specified format:
    • If return_type is a file path: yields the file path for each chunk
    • If return_type is 'base64': yields base64 encoded strings
    • If return_type is 'bytes': yields raw audio bytes
Raises
  • ValueError: If return_type is not a valid option
Examples
>>> model = VoiceModel(provider="elevenlabs", model="eleven_multilingual_v2", voice="21m00Tcm4TlvDq8ikWAM")
>>> async for audio_chunk in model.stream("Long text to convert to speech"):
...     print(f"Generated audio chunk: {audio_chunk}")