monoai.application

The Application module provides a high-level interface for building and serving AI applications incorporating models and agents.

View Source

1"""
2The Application module provides a high-level interface for building and serving AI applications incorporating models and agents.
3"""
4
5from .application import Application
6from .rate_limiter import RateLimiter, Limit
7
8__all__ = ["Application", "RateLimiter", "Limit"]

class Application: View Source

 11class Application:
 12    """
 13    FastAPI-based application for serving AI models and agents.
 14    
 15    The Application class provides a complete web service wrapper around AI models
 16    and agents, offering REST API endpoints, WebSocket support, rate limiting,
 17    and user validation capabilities.
 18    
 19    This class automatically creates FastAPI endpoints based on the configured
 20    models and agents, handling request validation, rate limiting, and response
 21    formatting.
 22        
 23    Examples
 24    --------
 25    Basic usage with a model:
 26    ```
 27    from monoai.models import Model
 28    from monoai.application import Application
 29    
 30    model = Model(provider="openai", model="gpt-4o-mini")
 31    app = Application(name="MyAIApp", model=model)
 32    app.serve(port=8000)
 33    ```
 34    
 35    With agents and rate limiting:
 36    ```
 37    from monoai.models import Model
 38    from monoai.agents import Agent
 39    from monoai.application import Application, RateLimiter
 40    
 41    model = Model(provider="openai", model="gpt-4o-mini")
 42    agent = Agent(model=model, paradigm="react")
 43    rate_limiter = RateLimiter(requests_per_minute=60)
 44    
 45    app = Application(
 46        name="AgentApp",
 47        agents=[agent],
 48        rate_limiter=rate_limiter
 49    )
 50    app.serve(port=8000)
 51    ```
 52    
 53    With user validation:
 54    ```
 55    def validate_user(user_id: str):
 56        # Custom validation logic
 57        if user_id.startswith("user_"):
 58            return True
 59        elif user_id.isdigit():
 60            return f"user_{user_id}"  # Normalize
 61        return False
 62    
 63    app = Application(
 64        name="SecureApp",
 65        model=model,
 66        user_validator=validate_user
 67    )
 68    ```
 69    """
 70
 71    def __init__(self, name: str, model: Optional[Model] = None, agents: Optional[List[Agent]] = None, 
 72                 rate_limiter: Optional[RateLimiter] = None, user_validator: Optional[Callable[[str], Union[bool, str]]] = None):
 73        """
 74        Initialize the application.
 75        
 76        Parameters
 77        ----------
 78        name : str
 79            Application name. Used in API responses and logging.
 80        model : Optional[Model], default None
 81            AI model to use. If provided, creates /model endpoints.
 82            The model will be available at POST /model and POST /model/stream.
 83        agents : Optional[List[Agent]], default None
 84            List of available agents. Each agent must have a unique name.
 85            Creates /agent/{agent_name} endpoints for each agent.
 86        rate_limiter : Optional[RateLimiter], default None
 87            Rate limiter to control API usage. Applies to all endpoints.
 88            If not provided, no rate limiting is enforced.
 89        user_validator : Optional[Callable[[str], Union[bool, str]]], default None
 90            Function to validate user_id from requests. Must return:
 91            - True: user_id is valid and accepted as-is
 92            - False: user_id is invalid, will fallback to IP-based identification
 93            - str: user_id is valid but normalized (e.g. "user123" -> "user_123")
 94            
 95        Notes
 96        -----
 97        At least one of model or agents must be provided to create useful endpoints.
 98        If neither is provided, only meta endpoints (/, /health) will be available.
 99        
100        The user_validator function is called for every request that includes
101        a user_id in the request body. If validation fails, the application
102        falls back to using the client IP address for rate limiting.
103        """
104        self.name = name
105        self._model = model
106        self._agents: Optional[Dict[str, Agent]] = (
107            {a.name: a for a in agents} if agents else None
108        )
109        self._rate_limiter = rate_limiter
110        self._user_validator = user_validator
111        self._started_at = datetime.now(timezone.utc)
112
113    @staticmethod
114    async def _maybe_await(fn: Callable[..., Any], *args, **kwargs) -> Any:
115        """
116        Execute a function and await it if it's a coroutine.
117        
118        This helper method allows the application to work with both
119        synchronous and asynchronous model/agent methods.
120        
121        Parameters
122        ----------
123        fn : Callable[..., Any]
124            Function to execute
125        *args
126            Positional arguments to pass to the function
127        **kwargs
128            Keyword arguments to pass to the function
129            
130        Returns
131        -------
132        Any
133            Result of the function execution
134        """
135        result = fn(*args, **kwargs)
136        if hasattr(result, "__await__"):
137            return await result 
138        return result
139
140    def _get_user_identifier(self, request, data: Dict[str, Any]) -> str:
141        """
142        Extract user identifier from the request.
143        
144        This method implements a multi-step user identification process:
145        1. Look for user_id in the request body
146        2. Validate user_id using the configured validator (if any)
147        3. Fall back to client IP address if user_id is invalid or missing
148        
149        The method handles various proxy headers (X-Forwarded-For, X-Real-IP)
150        to get the real client IP when behind load balancers or proxies.
151        
152        Parameters
153        ----------
154        request : Request
155            FastAPI request object (can be HTTP Request or WebSocket)
156        data : Dict[str, Any]
157            Request body data containing potential user_id
158            
159        Returns
160        -------
161        str
162            User identifier in one of these formats:
163            - "user_id" if user_id is provided and valid
164            - "ip:192.168.1.1" if using IP-based identification
165            - "ip:unknown" if IP cannot be determined
166            
167        Notes
168        -----
169        The user identifier is used for rate limiting and request tracking.
170        IP-based identifiers are prefixed with "ip:" to distinguish them
171        from actual user IDs.
172        """
173        # Look for user_id in the request body
174        user_id = data.get("user_id")
175        if user_id:
176            user_id_str = str(user_id)
177            
178            # Validate user_id if validator is configured
179            if self._user_validator:
180                try:
181                    validation_result = self._user_validator(user_id_str)
182                    
183                    if validation_result is True:
184                        # user_id is valid, use as is
185                        return user_id_str
186                    elif validation_result is False:
187                        # user_id is invalid, fallback to IP
188                        pass
189                    elif isinstance(validation_result, str):
190                        # user_id is valid but normalized
191                        return validation_result
192                    else:
193                        # Unknown validation result, fallback to IP
194                        pass
195                except Exception:
196                    # Error during validation, fallback to IP
197                    pass
198            else:
199                # No validator configured, use user_id as is
200                return user_id_str
201        
202        # Fallback to client IP
203        # Try different headers to get the real IP
204        forwarded_for = request.headers.get("X-Forwarded-For")
205        if forwarded_for:
206            # X-Forwarded-For can contain multiple IPs, take the first one
207            client_ip = forwarded_for.split(",")[0].strip()
208        else:
209            real_ip = request.headers.get("X-Real-IP")
210            if real_ip:
211                client_ip = real_ip
212            else:
213                # Handle both Request and WebSocket
214                if hasattr(request, 'client') and request.client:
215                    client_ip = request.client.host
216                elif hasattr(request, 'url') and hasattr(request.url, 'hostname'):
217                    client_ip = request.url.hostname
218                else:
219                    client_ip = "unknown"
220        
221        return f"ip:{client_ip}"
222
223    def validate_user_id(self, user_id: str) -> Union[bool, str]:
224        """
225        Validate a user_id using the configured validator.
226        
227        This method provides a safe way to validate user IDs, handling
228        any exceptions that might occur during validation.
229        
230        Parameters
231        ----------
232        user_id : str
233            user_id to validate
234        
235        Returns
236        -------
237        Union[bool, str]
238            - True: user_id is valid and accepted as-is
239            - False: user_id is invalid or validation failed
240            - str: user_id is valid but normalized (use this value instead)
241            
242        Notes
243        -----
244        If no validator is configured, this method always returns True.
245        Any exceptions during validation are caught and result in False.
246        """
247        if not self._user_validator:
248            return True  # No validator, always consider valid
249        
250        try:
251            return self._user_validator(user_id)
252        except Exception:
253            return False  # Error during validation, consider invalid
254
255    def _build_app(self):
256        """
257        Build and configure the FastAPI application.
258        
259        This method creates a FastAPI app with all the necessary endpoints,
260        middleware, and error handling based on the configured models and agents.
261        
262        Returns
263        -------
264        FastAPI
265            Configured FastAPI application instance
266            
267        Raises
268        ------
269        ImportError
270            If FastAPI is not installed
271            
272        Notes
273        -----
274        The method dynamically creates endpoints based on what's configured:
275        - Model endpoints are created if a model is provided
276        - Agent endpoints are created if agents are provided
277        - Meta endpoints (/, /health) are always created
278        - Rate limiting and user validation are applied to all endpoints
279        """
280
281        try:
282            from fastapi import FastAPI, Request, HTTPException, status
283        except ImportError as e:
284            raise ImportError(
285                "fastapi is required to build the application. "
286                "Install it with: pip install fastapi"
287            ) from e
288
289        from fastapi.middleware.cors import CORSMiddleware
290
291        app = FastAPI(title=self.name)
292
293        app.add_middleware(
294            CORSMiddleware,
295            allow_origins=["*"],
296            allow_credentials=True,
297            allow_methods=["*"],
298            allow_headers=["*"],
299        )
300
301        @app.get("/", tags=["meta"], summary="Ping app")
302        async def root():
303            return {
304                "msg": f"App {self.name} successfully started",
305                "started_at": self._started_at.isoformat() + "Z",
306            }
307
308        @app.get("/health", tags=["meta"], summary="Health check")
309        async def health():
310            return {"status": "ok", "app": self.name}
311
312        @app.get("/rate-limit/stats", tags=["rate-limit"], summary="Rate limiter statistics")
313        async def rate_limit_stats():
314            if not self._rate_limiter:
315                return {"message": "Rate limiter not configured"}
316            
317            stats = self._rate_limiter.get_stats()
318            return {
319                "rate_limiter": str(self._rate_limiter),
320                "global_stats": stats
321            }
322
323        @app.get("/rate-limit/stats/{user_id}", tags=["rate-limit"], summary="Statistics for a specific user")
324        async def rate_limit_user_stats(user_id: str):
325            if not self._rate_limiter:
326                return {"message": "Rate limiter not configured"}
327            
328            user_stats = self._rate_limiter.get_stats(user_id)
329            usage = self._rate_limiter.get_usage(user_id)
330            remaining = self._rate_limiter.get_remaining(user_id)
331            
332            return {
333                "user_id": user_id,
334                "usage": usage,
335                "remaining": remaining,
336                "stats": user_stats
337            }
338
339        @app.post("/validate-user", tags=["auth"], summary="Validate a user_id")
340        async def validate_user_endpoint(request: Request):
341            try:
342                data = await request.json()
343            except Exception:
344                raise HTTPException(status_code=400, detail="Invalid JSON body")
345            
346            user_id = data.get("user_id")
347            if not user_id:
348                raise HTTPException(status_code=400, detail="'user_id' is required")
349            
350            validation_result = self.validate_user_id(str(user_id))
351            
352            if validation_result is True:
353                return {
354                    "valid": True,
355                    "user_id": user_id,
356                    "normalized": None
357                }
358            elif validation_result is False:
359                return {
360                    "valid": False,
361                    "user_id": user_id,
362                    "error": "Invalid user_id"
363                }
364            elif isinstance(validation_result, str):
365                return {
366                    "valid": True,
367                    "user_id": user_id,
368                    "normalized": validation_result
369                }
370            else:
371                return {
372                    "valid": False,
373                    "user_id": user_id,
374                    "error": "Unknown validation result"
375                }
376
377        if self._model is not None:
378            @app.post(
379                "/model",
380                tags=["model"],
381                summary="Ask the model",
382                status_code=status.HTTP_200_OK,
383            )
384            async def model_route(request: Request):
385                try:
386                    data = await request.json()
387                except Exception:
388                    raise HTTPException(status_code=400, detail="Invalid JSON body")
389
390                prompt = (data or {}).get("prompt")
391                if not prompt:
392                    raise HTTPException(status_code=400, detail="'prompt' is required")
393
394                # Extract user identifier
395                user_identifier = self._get_user_identifier(request, data or {})
396                
397                # Execute the request to the model
398                result = await self._maybe_await(self._model.ask, prompt)
399                
400                # Check and update rate limit if configured
401                if self._rate_limiter:
402                    # Check rate limit based on the response
403                    if not self._rate_limiter.check_with_response(user_identifier, result):
404                        raise HTTPException(
405                            status_code=429, 
406                            detail="Rate limit exceeded. Please try again later."
407                        )
408
409                    # Update the rate limiter with the response
410                    self._rate_limiter.update_with_response(user_identifier, result)
411                
412                return result
413
414        if self._agents is not None:
415            @app.post(
416                "/agent/{agent_name}",
417                tags=["agents"],
418                summary="Execute an agent",
419                status_code=status.HTTP_200_OK,
420            )
421            async def agent_route(agent_name: str, request: Request):
422                if agent_name not in self._agents:
423                    raise HTTPException(status_code=404, detail=f"Agent '{agent_name}' not found")
424
425                try:
426                    data = await request.json()
427                except Exception:
428                    raise HTTPException(status_code=400, detail="Invalid JSON body")
429
430                prompt = (data or {}).get("prompt")
431                if not prompt:
432                    raise HTTPException(status_code=400, detail="'prompt' is required")
433
434                # Extract user identifier
435                user_identifier = self._get_user_identifier(request, data or {})
436                
437                # Execute the agent
438                agent = self._agents[agent_name]
439                result = await self._maybe_await(agent.run, prompt)
440                
441                # Check and update rate limit if configured
442                if self._rate_limiter:
443                    # Check rate limit based on the response
444                    if not self._rate_limiter.check_with_response(user_identifier, result):
445                        raise HTTPException(
446                            status_code=429, 
447                            detail="Rate limit exceeded. Please try again later."
448                        )
449
450                    # Update the rate limiter with the response
451                    self._rate_limiter.update_with_response(user_identifier, result)
452                
453                return result
454
455        # Model streaming endpoint
456        if self._model is not None:
457            @app.post(
458                "/model/stream",
459                tags=["model"],
460                summary="Ask the model with streaming",
461                status_code=status.HTTP_200_OK,
462            )
463            async def model_stream_route(request: Request):
464                try:
465                    data = await request.json()
466                except Exception:
467                    raise HTTPException(status_code=400, detail="Invalid JSON body")
468
469                prompt = (data or {}).get("prompt")
470                if not prompt:
471                    raise HTTPException(status_code=400, detail="'prompt' is required")
472
473                # Extract user identifier
474                user_identifier = self._get_user_identifier(request, data or {})
475                
476                # Create a function to handle streaming
477                def stream_handler(content: str):
478                    return content
479                
480                # Enable streaming on the model
481                if hasattr(self._model, 'enable_streaming'):
482                    self._model.enable_streaming(stream_handler)
483                
484                # Execute the request to the model with streaming
485                result = await self._maybe_await(self._model.ask, prompt)
486                
487                # Check and update rate limit if configured
488                if self._rate_limiter:
489                    # Check rate limit based on the response
490                    if not self._rate_limiter.check_with_response(user_identifier, result):
491                        raise HTTPException(
492                            status_code=429, 
493                            detail="Rate limit exceeded. Please try again later."
494                        )
495
496                    # Update the rate limiter with the response
497                    self._rate_limiter.update_with_response(user_identifier, result)
498                
499                return result
500
501        # Agent streaming endpoint
502        if self._agents is not None:
503            @app.post(
504                "/agent/{agent_name}/stream",
505                tags=["agents"],
506                summary="Execute an agent with streaming",
507                status_code=status.HTTP_200_OK,
508            )
509            async def agent_stream_route(agent_name: str, request: Request):
510                if agent_name not in self._agents:
511                    raise HTTPException(status_code=404, detail=f"Agent '{agent_name}' not found")
512
513                try:
514                    data = await request.json()
515                except Exception:
516                    raise HTTPException(status_code=400, detail="Invalid JSON body")
517
518                prompt = (data or {}).get("prompt")
519                if not prompt:
520                    raise HTTPException(status_code=400, detail="'prompt' is required")
521
522                # Extract user identifier
523                user_identifier = self._get_user_identifier(request, data or {})
524                
525                # Create a function to handle streaming
526                def stream_handler(content: str):
527                    return content
528                
529                # Enable streaming on the agent
530                agent = self._agents[agent_name]
531                if hasattr(agent, 'enable_streaming'):
532                    agent.enable_streaming(stream_handler)
533                
534                # Execute the agent with streaming
535                result = await self._maybe_await(agent.run, prompt)
536                
537                # Check and update rate limit if configured
538                if self._rate_limiter:
539                    # Check rate limit based on the response
540                    if not self._rate_limiter.check_with_response(user_identifier, result):
541                        raise HTTPException(
542                            status_code=429, 
543                            detail="Rate limit exceeded. Please try again later."
544                        )
545
546                    # Update the rate limiter with the response
547                    self._rate_limiter.update_with_response(user_identifier, result)
548                
549                return result
550
551        # WebSocket endpoint for real-time streaming
552        if self._model is not None:
553            @app.websocket("/model/ws")
554            async def model_websocket(websocket):
555                try:
556                    from fastapi import WebSocket
557                    await websocket.accept()
558                    
559                    while True:
560                        # Receive prompt from client
561                        data = await websocket.receive_text()
562                        try:
563                            request_data = json.loads(data)
564                        except json.JSONDecodeError:
565                            await websocket.send_text(json.dumps({"error": "Invalid JSON"}))
566                            continue
567                        
568                        prompt = request_data.get("prompt")
569                        if not prompt:
570                            await websocket.send_text(json.dumps({"error": "Prompt is required"}))
571                            continue
572                        
573                        # Extract user identifier
574                        user_identifier = self._get_user_identifier(websocket, request_data)
575                        
576                        # Create a handler to send chunks via WebSocket
577                        def ws_stream_handler(content: str):
578                            asyncio.create_task(websocket.send_text(json.dumps({
579                                "type": "chunk",
580                                "content": content
581                            })))
582                        
583                        # Enable streaming on the model
584                        if hasattr(self._model, 'enable_streaming'):
585                            self._model.enable_streaming(ws_stream_handler)
586                        
587                        # Execute the request to the model
588                        result = await self._maybe_await(self._model.ask, prompt)
589                        
590                        # Send the final result
591                        await websocket.send_text(json.dumps({
592                            "type": "complete",
593                            "result": result
594                        }))
595                        
596                except Exception as e:
597                    await websocket.send_text(json.dumps({"error": str(e)}))
598                finally:
599                    await websocket.close()
600
601        if self._agents is not None:
602            @app.websocket("/agent/{agent_name}/ws")
603            async def agent_websocket(websocket, agent_name: str):
604                try:
605                    from fastapi import WebSocket
606                    await websocket.accept()
607                    
608                    if agent_name not in self._agents:
609                        await websocket.send_text(json.dumps({"error": f"Agent '{agent_name}' not found"}))
610                        await websocket.close()
611                        return
612                    
613                    agent = self._agents[agent_name]
614                    
615                    while True:
616                        # Receive prompt from client
617                        data = await websocket.receive_text()
618                        try:
619                            request_data = json.loads(data)
620                        except json.JSONDecodeError:
621                            await websocket.send_text(json.dumps({"error": "Invalid JSON"}))
622                            continue
623                        
624                        prompt = request_data.get("prompt")
625                        if not prompt:
626                            await websocket.send_text(json.dumps({"error": "Prompt is required"}))
627                            continue
628                        
629                        # Extract user identifier
630                        user_identifier = self._get_user_identifier(websocket, request_data)
631                        
632                        # Create a handler to send chunks via WebSocket
633                        def ws_stream_handler(content: str):
634                            asyncio.create_task(websocket.send_text(json.dumps({
635                                "type": "chunk",
636                                "content": content
637                            })))
638                        
639                        # Enable streaming on the agent
640                        if hasattr(agent, 'enable_streaming'):
641                            agent.enable_streaming(ws_stream_handler)
642                        
643                        # Execute the agent
644                        result = await self._maybe_await(agent.run, prompt)
645                        
646                        # Send the final result
647                        await websocket.send_text(json.dumps({
648                            "type": "complete",
649                            "result": result
650                        }))
651                        
652                except Exception as e:
653                    await websocket.send_text(json.dumps({"error": str(e)}))
654                finally:
655                    await websocket.close()
656
657        return app
658
659    def serve(
660        self,
661        host: str = "0.0.0.0",
662        port: int = 8000,
663        reload: bool = False,
664        workers: Optional[int] = None,
665        log_level: str = "info",
666    ):
667
668        """
669        Serve the application creating endpoints for the model and agents.
670        
671        This method starts a uvicorn server with the configured FastAPI application.
672        The server provides REST API and WebSocket endpoints for interacting with
673        AI models and agents.
674        
675        API Endpoints
676        -------------
677        When the server is running, the following endpoints are available:
678        
679        **Model Endpoints** (if model is configured):
680            - POST /model                    Ask the model
681            - POST /model/stream             Ask the model with streaming
682            - WS  /model/ws                  Ask the model with WebSocket streaming
683        
684        **Agent Endpoints** (if agents are configured):
685            - POST /agent/{agent_name}       Execute an agent
686            - POST /agent/{agent_name}/stream Execute an agent with streaming
687            - WS  /agent/{agent_name}/ws     Execute an agent with WebSocket streaming
688        
689        **Authentication & Validation:**
690            - POST /validate-user            Validate a user_id
691        
692        **Rate Limiting & Monitoring:**
693            - GET  /rate-limit/stats         Rate limiter statistics
694            - GET  /rate-limit/stats/{user_id} Statistics for a specific user
695        
696        **Meta & Health:**
697            - GET  /                         Ping the application
698            - GET  /health                   Health check
699        
700        **Request Format:**
701        All POST endpoints expect JSON with a "prompt" field:
702        ```json
703        {
704            "prompt": "Your question here",
705            "user_id": "optional_user_id"
706        }
707        ```
708        
709        **Response Format:**
710        - Model responses: Standard model response format
711        - Agent responses: Agent execution result with iterations
712        - WebSocket: JSON messages with "type" and "content" fields
713        
714        Parameters
715        ----------
716        host : str, default "0.0.0.0"
717            Host to serve the application. Use "0.0.0.0" for external access.
718        port : int, default 8000
719            Port to serve the application on.
720        reload : bool, default False
721            Whether to reload the application when code changes are detected.
722            Useful for development, not recommended for production.
723        workers : Optional[int], default None
724            Number of worker processes to use. If None, uses uvicorn default.
725        log_level : str, default "info"
726            Log level for uvicorn. Options: "critical", "error", "warning", "info", "debug".
727            
728        Raises
729        ------
730        ImportError
731            If uvicorn is not installed
732            
733        Examples
734        --------
735        Basic serving:
736        ```
737        app = Application(name="MyApp", model=model)
738        app.serve()  # Serves on http://localhost:8000
739        ```
740        
741        Production serving:
742        ```
743        app.serve(host="0.0.0.0", port=8080, workers=4, log_level="warning")
744        ```
745        
746        Development serving:
747        ```
748        app.serve(reload=True, log_level="debug")
749        ```
750        """
751
752        try:
753            import uvicorn
754        except ImportError as e:
755            raise ImportError(
756                "uvicorn is required to serve the application. "
757                "Install it with: pip install uvicorn"
758            ) from e
759
760        uvicorn.run(
761            self._build_app(),
762            host=host,
763            port=port,
764            reload=reload,
765            workers=workers,
766            log_level=log_level,
767        )

FastAPI-based application for serving AI models and agents.

The Application class provides a complete web service wrapper around AI models and agents, offering REST API endpoints, WebSocket support, rate limiting, and user validation capabilities.

This class automatically creates FastAPI endpoints based on the configured models and agents, handling request validation, rate limiting, and response formatting.

Examples

Basic usage with a model:

from monoai.models import Model
from monoai.application import Application

model = Model(provider="openai", model="gpt-4o-mini")
app = Application(name="MyAIApp", model=model)
app.serve(port=8000)

With agents and rate limiting:

from monoai.models import Model
from monoai.agents import Agent
from monoai.application import Application, RateLimiter

model = Model(provider="openai", model="gpt-4o-mini")
agent = Agent(model=model, paradigm="react")
rate_limiter = RateLimiter(requests_per_minute=60)

app = Application(
    name="AgentApp",
    agents=[agent],
    rate_limiter=rate_limiter
)
app.serve(port=8000)

With user validation:

def validate_user(user_id: str):
    # Custom validation logic
    if user_id.startswith("user_"):
        return True
    elif user_id.isdigit():
        return f"user_{user_id}"  # Normalize
    return False

app = Application(
    name="SecureApp",
    model=model,
    user_validator=validate_user
)

Application( name: str, model: Optional[monoai.models.Model] = None, agents: Optional[List[monoai.agents.Agent]] = None, rate_limiter: Optional[RateLimiter] = None, user_validator: Optional[Callable[[str], Union[bool, str]]] = None) View Source

 71    def __init__(self, name: str, model: Optional[Model] = None, agents: Optional[List[Agent]] = None, 
 72                 rate_limiter: Optional[RateLimiter] = None, user_validator: Optional[Callable[[str], Union[bool, str]]] = None):
 73        """
 74        Initialize the application.
 75        
 76        Parameters
 77        ----------
 78        name : str
 79            Application name. Used in API responses and logging.
 80        model : Optional[Model], default None
 81            AI model to use. If provided, creates /model endpoints.
 82            The model will be available at POST /model and POST /model/stream.
 83        agents : Optional[List[Agent]], default None
 84            List of available agents. Each agent must have a unique name.
 85            Creates /agent/{agent_name} endpoints for each agent.
 86        rate_limiter : Optional[RateLimiter], default None
 87            Rate limiter to control API usage. Applies to all endpoints.
 88            If not provided, no rate limiting is enforced.
 89        user_validator : Optional[Callable[[str], Union[bool, str]]], default None
 90            Function to validate user_id from requests. Must return:
 91            - True: user_id is valid and accepted as-is
 92            - False: user_id is invalid, will fallback to IP-based identification
 93            - str: user_id is valid but normalized (e.g. "user123" -> "user_123")
 94            
 95        Notes
 96        -----
 97        At least one of model or agents must be provided to create useful endpoints.
 98        If neither is provided, only meta endpoints (/, /health) will be available.
 99        
100        The user_validator function is called for every request that includes
101        a user_id in the request body. If validation fails, the application
102        falls back to using the client IP address for rate limiting.
103        """
104        self.name = name
105        self._model = model
106        self._agents: Optional[Dict[str, Agent]] = (
107            {a.name: a for a in agents} if agents else None
108        )
109        self._rate_limiter = rate_limiter
110        self._user_validator = user_validator
111        self._started_at = datetime.now(timezone.utc)

Initialize the application.

Parameters

name (str): Application name. Used in API responses and logging.
model (Optional[Model], default None): AI model to use. If provided, creates /model endpoints. The model will be available at POST /model and POST /model/stream.
agents (Optional[List[Agent]], default None): List of available agents. Each agent must have a unique name. Creates /agent/{agent_name} endpoints for each agent.
rate_limiter (Optional[RateLimiter], default None): Rate limiter to control API usage. Applies to all endpoints. If not provided, no rate limiting is enforced.
user_validator (Optional[Callable[[str], Union[bool, str]]], default None): Function to validate user_id from requests. Must return:
- True: user_id is valid and accepted as-is
- False: user_id is invalid, will fallback to IP-based identification
- str: user_id is valid but normalized (e.g. "user123" -> "user_123")

Notes

At least one of model or agents must be provided to create useful endpoints. If neither is provided, only meta endpoints (/, /health) will be available.

The user_validator function is called for every request that includes a user_id in the request body. If validation fails, the application falls back to using the client IP address for rate limiting.

name

def validate_user_id(self, user_id: str) -> Union[bool, str]: View Source

223    def validate_user_id(self, user_id: str) -> Union[bool, str]:
224        """
225        Validate a user_id using the configured validator.
226        
227        This method provides a safe way to validate user IDs, handling
228        any exceptions that might occur during validation.
229        
230        Parameters
231        ----------
232        user_id : str
233            user_id to validate
234        
235        Returns
236        -------
237        Union[bool, str]
238            - True: user_id is valid and accepted as-is
239            - False: user_id is invalid or validation failed
240            - str: user_id is valid but normalized (use this value instead)
241            
242        Notes
243        -----
244        If no validator is configured, this method always returns True.
245        Any exceptions during validation are caught and result in False.
246        """
247        if not self._user_validator:
248            return True  # No validator, always consider valid
249        
250        try:
251            return self._user_validator(user_id)
252        except Exception:
253            return False  # Error during validation, consider invalid

Validate a user_id using the configured validator.

This method provides a safe way to validate user IDs, handling any exceptions that might occur during validation.

Parameters

user_id (str): user_id to validate

Returns

Union[bool, str]: - True: user_id is valid and accepted as-is
- False: user_id is invalid or validation failed
- str: user_id is valid but normalized (use this value instead)

Notes

If no validator is configured, this method always returns True. Any exceptions during validation are caught and result in False.

def serve( self, host: str = '0.0.0.0', port: int = 8000, reload: bool = False, workers: Optional[int] = None, log_level: str = 'info'): View Source

659    def serve(
660        self,
661        host: str = "0.0.0.0",
662        port: int = 8000,
663        reload: bool = False,
664        workers: Optional[int] = None,
665        log_level: str = "info",
666    ):
667
668        """
669        Serve the application creating endpoints for the model and agents.
670        
671        This method starts a uvicorn server with the configured FastAPI application.
672        The server provides REST API and WebSocket endpoints for interacting with
673        AI models and agents.
674        
675        API Endpoints
676        -------------
677        When the server is running, the following endpoints are available:
678        
679        **Model Endpoints** (if model is configured):
680            - POST /model                    Ask the model
681            - POST /model/stream             Ask the model with streaming
682            - WS  /model/ws                  Ask the model with WebSocket streaming
683        
684        **Agent Endpoints** (if agents are configured):
685            - POST /agent/{agent_name}       Execute an agent
686            - POST /agent/{agent_name}/stream Execute an agent with streaming
687            - WS  /agent/{agent_name}/ws     Execute an agent with WebSocket streaming
688        
689        **Authentication & Validation:**
690            - POST /validate-user            Validate a user_id
691        
692        **Rate Limiting & Monitoring:**
693            - GET  /rate-limit/stats         Rate limiter statistics
694            - GET  /rate-limit/stats/{user_id} Statistics for a specific user
695        
696        **Meta & Health:**
697            - GET  /                         Ping the application
698            - GET  /health                   Health check
699        
700        **Request Format:**
701        All POST endpoints expect JSON with a "prompt" field:
702        ```json
703        {
704            "prompt": "Your question here",
705            "user_id": "optional_user_id"
706        }
707        ```
708        
709        **Response Format:**
710        - Model responses: Standard model response format
711        - Agent responses: Agent execution result with iterations
712        - WebSocket: JSON messages with "type" and "content" fields
713        
714        Parameters
715        ----------
716        host : str, default "0.0.0.0"
717            Host to serve the application. Use "0.0.0.0" for external access.
718        port : int, default 8000
719            Port to serve the application on.
720        reload : bool, default False
721            Whether to reload the application when code changes are detected.
722            Useful for development, not recommended for production.
723        workers : Optional[int], default None
724            Number of worker processes to use. If None, uses uvicorn default.
725        log_level : str, default "info"
726            Log level for uvicorn. Options: "critical", "error", "warning", "info", "debug".
727            
728        Raises
729        ------
730        ImportError
731            If uvicorn is not installed
732            
733        Examples
734        --------
735        Basic serving:
736        ```
737        app = Application(name="MyApp", model=model)
738        app.serve()  # Serves on http://localhost:8000
739        ```
740        
741        Production serving:
742        ```
743        app.serve(host="0.0.0.0", port=8080, workers=4, log_level="warning")
744        ```
745        
746        Development serving:
747        ```
748        app.serve(reload=True, log_level="debug")
749        ```
750        """
751
752        try:
753            import uvicorn
754        except ImportError as e:
755            raise ImportError(
756                "uvicorn is required to serve the application. "
757                "Install it with: pip install uvicorn"
758            ) from e
759
760        uvicorn.run(
761            self._build_app(),
762            host=host,
763            port=port,
764            reload=reload,
765            workers=workers,
766            log_level=log_level,
767        )

Serve the application creating endpoints for the model and agents.

This method starts a uvicorn server with the configured FastAPI application. The server provides REST API and WebSocket endpoints for interacting with AI models and agents.

API Endpoints

When the server is running, the following endpoints are available:

Model Endpoints (if model is configured): - POST /model Ask the model - POST /model/stream Ask the model with streaming - WS /model/ws Ask the model with WebSocket streaming

Agent Endpoints (if agents are configured): - POST /agent/{agent_name} Execute an agent - POST /agent/{agent_name}/stream Execute an agent with streaming - WS /agent/{agent_name}/ws Execute an agent with WebSocket streaming

Authentication & Validation: - POST /validate-user Validate a user_id

Rate Limiting & Monitoring: - GET /rate-limit/stats Rate limiter statistics - GET /rate-limit/stats/{user_id} Statistics for a specific user

Meta & Health: - GET / Ping the application - GET /health Health check

Request Format: All POST endpoints expect JSON with a "prompt" field:

{
    "prompt": "Your question here",
    "user_id": "optional_user_id"
}

Response Format:

Model responses: Standard model response format
Agent responses: Agent execution result with iterations
WebSocket: JSON messages with "type" and "content" fields

Parameters

host (str, default "0.0.0.0"): Host to serve the application. Use "0.0.0.0" for external access.
port (int, default 8000): Port to serve the application on.
reload (bool, default False): Whether to reload the application when code changes are detected. Useful for development, not recommended for production.
workers (Optional[int], default None): Number of worker processes to use. If None, uses uvicorn default.
log_level (str, default "info"): Log level for uvicorn. Options: "critical", "error", "warning", "info", "debug".

Raises

ImportError: If uvicorn is not installed

Examples

Basic serving:

app = Application(name="MyApp", model=model)
app.serve()  # Serves on http://localhost:8000

Production serving:

app.serve(host="0.0.0.0", port=8080, workers=4, log_level="warning")

Development serving:

app.serve(reload=True, log_level="debug")

@dataclass

class Limit: View Source

10@dataclass
11class Limit:
12    """
13    Rappresenta un singolo limite di rate limiting.
14    
15    Attributes
16    ----------
17    unit : str
18        Unità di misura ('token', 'request')
19    value : int
20        Numero massimo di unità consentite nel periodo di reset
21    reset_unit : str
22        Unità di tempo per il reset ('second', 'minute', 'hour', 'day')
23    reset_value : int
24        Valore numerico per l'unità di tempo (es. 1 per "1 giorno")
25    name : str, optional
26        Nome identificativo per il limite (default: auto-generato)
27    """
28    unit: str
29    value: int
30    reset_unit: str
31    reset_value: int
32    name: Optional[str] = None
33    
34    def __post_init__(self):
35        """Validazione e generazione automatica del nome."""
36        if self.unit not in ['token', 'request']:
37            raise ValueError("Unit must be one of: 'token', 'request'")
38        
39        if self.reset_unit not in ['second', 'minute', 'hour', 'day']:
40            raise ValueError("Reset_unit must be one of: 'second', 'minute', 'hour', 'day'")
41        
42        if self.reset_value <= 0:
43            raise ValueError("Reset_value must be a positive integer")
44        
45        if self.value <= 0:
46            raise ValueError("Value must be a positive integer")
47        
48        # Genera nome automatico se non fornito
49        if self.name is None:
50            self.name = f"{self.value}_{self.unit}_{self.reset_value}_{self.reset_unit}"
51    
52    def get_time_window_seconds(self) -> int:
53        """Converte l'unità di tempo in secondi."""
54        time_windows = {
55            'second': 1,
56            'minute': 60,
57            'hour': 3600,
58            'day': 86400
59        }
60        return time_windows[self.reset_unit] * self.reset_value
61    
62    def __repr__(self) -> str:
63        return f"Limit(name='{self.name}', unit='{self.unit}', value={self.value}, reset_unit='{self.reset_unit}', reset_value={self.reset_value})"

Rappresenta un singolo limite di rate limiting.

Attributes

unit (str): Unità di misura ('token', 'request')
value (int): Numero massimo di unità consentite nel periodo di reset
reset_unit (str): Unità di tempo per il reset ('second', 'minute', 'hour', 'day')
reset_value (int): Valore numerico per l'unità di tempo (es. 1 per "1 giorno")
name (str, optional): Nome identificativo per il limite (default: auto-generato)

Limit( unit: str, value: int, reset_unit: str, reset_value: int, name: Optional[str] = None)

unit: str

value: int

reset_unit: str

reset_value: int

name: Optional[str] = None

def get_time_window_seconds(self) -> int: View Source

52    def get_time_window_seconds(self) -> int:
53        """Converte l'unità di tempo in secondi."""
54        time_windows = {
55            'second': 1,
56            'minute': 60,
57            'hour': 3600,
58            'day': 86400
59        }
60        return time_windows[self.reset_unit] * self.reset_value

Converte l'unità di tempo in secondi.