knowledge-base/backend/models.py

"""Ultra-minimal Pydantic models for pure Mem0 API."""

from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
import re


# Constants for input validation
MAX_MESSAGE_LENGTH = 50000  # ~12k tokens max per message
MAX_QUERY_LENGTH = 10000  # ~2.5k tokens max per query
MAX_USER_ID_LENGTH = 100  # Reasonable user ID length
MAX_MEMORY_ID_LENGTH = 100  # Memory IDs are typically UUIDs
MAX_CONTEXT_MESSAGES = 100  # Max conversation context messages
USER_ID_PATTERN = r"^[a-zA-Z0-9_\-\.@]+$"  # Alphanumeric with common separators


# Request Models
class ChatMessage(BaseModel):
    """Chat message structure."""

    role: str = Field(
        ..., max_length=20, description="Message role (user, assistant, system)"
    )
    content: str = Field(
        ..., max_length=MAX_MESSAGE_LENGTH, description="Message content"
    )


class ChatRequest(BaseModel):
    """Ultra-minimal chat request."""

    message: str = Field(..., max_length=MAX_MESSAGE_LENGTH, description="User message")
    user_id: Optional[str] = Field(
        "default",
        max_length=MAX_USER_ID_LENGTH,
        pattern=USER_ID_PATTERN,
        description="User identifier (alphanumeric, _, -, ., @)",
    )
    agent_id: Optional[str] = Field(
        None, max_length=MAX_USER_ID_LENGTH, description="Agent identifier"
    )
    run_id: Optional[str] = Field(
        None, max_length=MAX_USER_ID_LENGTH, description="Run identifier"
    )
    context: Optional[List[ChatMessage]] = Field(
        None,
        max_length=MAX_CONTEXT_MESSAGES,
        description="Previous conversation context (max 100 messages)",
    )
    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")


class MemoryAddRequest(BaseModel):
    """Request to add memories with hierarchy support - open-source compatible."""

    messages: List[ChatMessage] = Field(
        ...,
        max_length=MAX_CONTEXT_MESSAGES,
        description="Messages to process (max 100 messages)",
    )
    user_id: Optional[str] = Field(
        "default",
        max_length=MAX_USER_ID_LENGTH,
        pattern=USER_ID_PATTERN,
        description="User identifier",
    )
    agent_id: Optional[str] = Field(
        None, max_length=MAX_USER_ID_LENGTH, description="Agent identifier"
    )
    run_id: Optional[str] = Field(
        None, max_length=MAX_USER_ID_LENGTH, description="Run identifier"
    )
    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")


class MemorySearchRequest(BaseModel):
    """Request to search memories with hierarchy filtering."""

    query: str = Field(..., max_length=MAX_QUERY_LENGTH, description="Search query")
    user_id: Optional[str] = Field(
        "default",
        max_length=MAX_USER_ID_LENGTH,
        pattern=USER_ID_PATTERN,
        description="User identifier",
    )
    agent_id: Optional[str] = Field(
        None, max_length=MAX_USER_ID_LENGTH, description="Agent identifier"
    )
    run_id: Optional[str] = Field(
        None, max_length=MAX_USER_ID_LENGTH, description="Run identifier"
    )
    limit: int = Field(5, ge=1, le=100, description="Maximum number of results (1-100)")
    threshold: Optional[float] = Field(
        None, ge=0.0, le=1.0, description="Minimum relevance score (0-1)"
    )
    filters: Optional[Dict[str, Any]] = Field(None, description="Additional filters")


class MemoryUpdateRequest(BaseModel):
    """Request to update a memory."""

    memory_id: str = Field(
        ..., max_length=MAX_MEMORY_ID_LENGTH, description="Memory ID to update"
    )
    user_id: str = Field(
        ...,
        max_length=MAX_USER_ID_LENGTH,
        pattern=USER_ID_PATTERN,
        description="User identifier for ownership verification",
    )
    content: str = Field(
        ..., max_length=MAX_MESSAGE_LENGTH, description="New memory content"
    )
    metadata: Optional[Dict[str, Any]] = Field(None, description="Updated metadata")


# Response Models - Ultra-minimal


class MemoryItem(BaseModel):
    """Individual memory item."""

    id: str = Field(..., description="Memory unique identifier")
    memory: str = Field(..., description="Memory content")
    user_id: Optional[str] = Field(None, description="Associated user ID")
    agent_id: Optional[str] = Field(None, description="Associated agent ID")
    run_id: Optional[str] = Field(None, description="Associated run ID")
    metadata: Optional[Dict[str, Any]] = Field(None, description="Memory metadata")
    score: Optional[float] = Field(
        None, description="Relevance score (for search results)"
    )
    created_at: Optional[str] = Field(None, description="Creation timestamp")
    updated_at: Optional[str] = Field(None, description="Last update timestamp")


class MemorySearchResponse(BaseModel):
    """Memory search results - pure Mem0 structure."""

    memories: List[MemoryItem] = Field(..., description="Found memories")
    total_count: int = Field(..., description="Total number of memories found")
    query: str = Field(..., description="Original search query")


class MemoryAddResponse(BaseModel):
    """Response from adding memories - pure Mem0 structure."""

    added_memories: List[Dict[str, Any]] = Field(
        ..., description="Memories that were added"
    )
    message: str = Field(..., description="Success message")


class GraphRelationship(BaseModel):
    """Graph relationship structure."""

    source: str = Field(..., description="Source entity")
    relationship: str = Field(..., description="Relationship type")
    target: str = Field(..., description="Target entity")
    properties: Optional[Dict[str, Any]] = Field(
        None, description="Relationship properties"
    )


class GraphResponse(BaseModel):
    """Graph relationships - pure Mem0 structure."""

    relationships: List[GraphRelationship] = Field(
        ..., description="Found relationships"
    )
    entities: List[str] = Field(..., description="Unique entities")
    user_id: str = Field(..., description="User identifier")


class HealthResponse(BaseModel):
    """Health check response."""

    status: str = Field(..., description="Service status")
    services: Dict[str, str] = Field(..., description="Individual service statuses")
    timestamp: str = Field(..., description="Health check timestamp")


class ErrorResponse(BaseModel):
    """Error response structure."""

    error: str = Field(..., description="Error message")
    detail: Optional[str] = Field(None, description="Detailed error information")
    status_code: int = Field(..., description="HTTP status code")


# Statistics and Monitoring Models


class MemoryOperationStats(BaseModel):
    """Memory operation statistics."""

    add: int = Field(..., description="Number of add operations")
    search: int = Field(..., description="Number of search operations")
    update: int = Field(..., description="Number of update operations")
    delete: int = Field(..., description="Number of delete operations")


class GlobalStatsResponse(BaseModel):
    """Global application statistics."""

    total_memories: int = Field(..., description="Total memories across all users")
    total_users: int = Field(..., description="Total number of users")
    api_calls_today: int = Field(..., description="Total API calls today")
    avg_response_time_ms: float = Field(
        ..., description="Average response time in milliseconds"
    )
    memory_operations: MemoryOperationStats = Field(
        ..., description="Memory operation breakdown"
    )
    uptime_seconds: float = Field(..., description="Application uptime in seconds")


class UserStatsResponse(BaseModel):
    """User-specific statistics."""

    user_id: str = Field(..., description="User identifier")
    memory_count: int = Field(..., description="Number of memories for this user")
    relationship_count: int = Field(
        ..., description="Number of graph relationships for this user"
    )
    last_activity: Optional[str] = Field(None, description="Last activity timestamp")
    api_calls_today: int = Field(..., description="API calls made by this user today")
    avg_response_time_ms: float = Field(
        ..., description="Average response time for this user's requests"
    )


# OpenAI-Compatible API Models


class OpenAIMessage(BaseModel):
    """OpenAI message format."""

    role: str = Field(..., description="Message role (system, user, assistant)")
    content: str = Field(..., description="Message content")


class OpenAIChatCompletionRequest(BaseModel):
    """OpenAI chat completion request format."""

    model: str = Field(..., description="Model to use (will use configured default)")
    messages: List[Dict[str, str]] = Field(..., description="List of messages")
    temperature: Optional[float] = Field(0.7, description="Sampling temperature")
    max_tokens: Optional[int] = Field(None, description="Maximum tokens to generate")
    stream: Optional[bool] = Field(False, description="Whether to stream responses")
    top_p: Optional[float] = Field(1.0, description="Nucleus sampling parameter")
    n: Optional[int] = Field(1, description="Number of completions to generate")
    stop: Optional[List[str]] = Field(None, description="Stop sequences")
    presence_penalty: Optional[float] = Field(0, description="Presence penalty")
    frequency_penalty: Optional[float] = Field(0, description="Frequency penalty")
    user: Optional[str] = Field(
        None, description="User identifier (ignored, uses API key)"
    )


class OpenAIUsage(BaseModel):
    """Token usage information."""

    prompt_tokens: int = Field(..., description="Tokens in the prompt")
    completion_tokens: int = Field(..., description="Tokens in the completion")
    total_tokens: int = Field(..., description="Total tokens used")


class OpenAIChoiceMessage(BaseModel):
    """Message in a choice."""

    role: str = Field(..., description="Role of the message")
    content: str = Field(..., description="Content of the message")


class OpenAIChoice(BaseModel):
    """Individual completion choice."""

    index: int = Field(..., description="Choice index")
    message: OpenAIChoiceMessage = Field(..., description="Message content")
    finish_reason: str = Field(..., description="Reason for completion finish")


class OpenAIChatCompletionResponse(BaseModel):
    """OpenAI chat completion response format."""

    id: str = Field(..., description="Unique completion ID")
    object: str = Field(default="chat.completion", description="Object type")
    created: int = Field(..., description="Unix timestamp of creation")
    model: str = Field(..., description="Model used for completion")
    choices: List[OpenAIChoice] = Field(..., description="List of completion choices")
    usage: Optional[OpenAIUsage] = Field(None, description="Token usage information")


# Streaming-specific models


class OpenAIStreamDelta(BaseModel):
    """Delta content in a streaming chunk."""

    role: Optional[str] = Field(None, description="Role (only in first chunk)")
    content: Optional[str] = Field(None, description="Incremental content")


class OpenAIStreamChoice(BaseModel):
    """Individual streaming choice."""

    index: int = Field(..., description="Choice index")
    delta: OpenAIStreamDelta = Field(..., description="Delta content")
    finish_reason: Optional[str] = Field(
        None, description="Reason for completion finish"
    )