knowledge-base/backend/models.py
Pratik Narola 50edce2d3c security hardening: add auth, rate limiting, fix info disclosure
- Add auth to /models and /users endpoints
- Add rate limiting to all endpoints (10-120/min based on operation type)
- Fix 11 info disclosure issues (detail=str(e) -> generic message)
- Fix 2 silent except blocks with proper logging
- Fix 7 raise e -> raise for proper exception chaining
- Fix health check to not expose exception details
- Update tests with X-API-Key headers and security tests
2026-01-15 22:41:24 +05:30

229 lines
8.1 KiB
Python

"""Ultra-minimal Pydantic models for pure Mem0 API."""
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
import re
# Constants for input validation
MAX_MESSAGE_LENGTH = 50000 # ~12k tokens max per message
MAX_QUERY_LENGTH = 10000 # ~2.5k tokens max per query
MAX_USER_ID_LENGTH = 100 # Reasonable user ID length
MAX_MEMORY_ID_LENGTH = 100 # Memory IDs are typically UUIDs
MAX_CONTEXT_MESSAGES = 100 # Max conversation context messages
USER_ID_PATTERN = r"^[a-zA-Z0-9_\-\.@]+$" # Alphanumeric with common separators
# Request Models
class ChatMessage(BaseModel):
"""Chat message structure."""
role: str = Field(
..., max_length=20, description="Message role (user, assistant, system)"
)
content: str = Field(
..., max_length=MAX_MESSAGE_LENGTH, description="Message content"
)
class ChatRequest(BaseModel):
"""Ultra-minimal chat request."""
message: str = Field(..., max_length=MAX_MESSAGE_LENGTH, description="User message")
user_id: Optional[str] = Field(
"default",
max_length=MAX_USER_ID_LENGTH,
pattern=USER_ID_PATTERN,
description="User identifier (alphanumeric, _, -, ., @)",
)
agent_id: Optional[str] = Field(
None, max_length=MAX_USER_ID_LENGTH, description="Agent identifier"
)
run_id: Optional[str] = Field(
None, max_length=MAX_USER_ID_LENGTH, description="Run identifier"
)
context: Optional[List[ChatMessage]] = Field(
None,
max_length=MAX_CONTEXT_MESSAGES,
description="Previous conversation context (max 100 messages)",
)
metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
class MemoryAddRequest(BaseModel):
"""Request to add memories with hierarchy support - open-source compatible."""
messages: List[ChatMessage] = Field(
...,
max_length=MAX_CONTEXT_MESSAGES,
description="Messages to process (max 100 messages)",
)
user_id: Optional[str] = Field(
"default",
max_length=MAX_USER_ID_LENGTH,
pattern=USER_ID_PATTERN,
description="User identifier",
)
agent_id: Optional[str] = Field(
None, max_length=MAX_USER_ID_LENGTH, description="Agent identifier"
)
run_id: Optional[str] = Field(
None, max_length=MAX_USER_ID_LENGTH, description="Run identifier"
)
metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
class MemorySearchRequest(BaseModel):
"""Request to search memories with hierarchy filtering."""
query: str = Field(..., max_length=MAX_QUERY_LENGTH, description="Search query")
user_id: Optional[str] = Field(
"default",
max_length=MAX_USER_ID_LENGTH,
pattern=USER_ID_PATTERN,
description="User identifier",
)
agent_id: Optional[str] = Field(
None, max_length=MAX_USER_ID_LENGTH, description="Agent identifier"
)
run_id: Optional[str] = Field(
None, max_length=MAX_USER_ID_LENGTH, description="Run identifier"
)
limit: int = Field(5, ge=1, le=100, description="Maximum number of results (1-100)")
threshold: Optional[float] = Field(
None, ge=0.0, le=1.0, description="Minimum relevance score (0-1)"
)
filters: Optional[Dict[str, Any]] = Field(None, description="Additional filters")
class MemoryUpdateRequest(BaseModel):
"""Request to update a memory."""
memory_id: str = Field(
..., max_length=MAX_MEMORY_ID_LENGTH, description="Memory ID to update"
)
user_id: str = Field(
...,
max_length=MAX_USER_ID_LENGTH,
pattern=USER_ID_PATTERN,
description="User identifier for ownership verification",
)
content: str = Field(
..., max_length=MAX_MESSAGE_LENGTH, description="New memory content"
)
metadata: Optional[Dict[str, Any]] = Field(None, description="Updated metadata")
# Response Models - Ultra-minimal
class MemoryItem(BaseModel):
"""Individual memory item."""
id: str = Field(..., description="Memory unique identifier")
memory: str = Field(..., description="Memory content")
user_id: Optional[str] = Field(None, description="Associated user ID")
agent_id: Optional[str] = Field(None, description="Associated agent ID")
run_id: Optional[str] = Field(None, description="Associated run ID")
metadata: Optional[Dict[str, Any]] = Field(None, description="Memory metadata")
score: Optional[float] = Field(
None, description="Relevance score (for search results)"
)
created_at: Optional[str] = Field(None, description="Creation timestamp")
updated_at: Optional[str] = Field(None, description="Last update timestamp")
class MemorySearchResponse(BaseModel):
"""Memory search results - pure Mem0 structure."""
memories: List[MemoryItem] = Field(..., description="Found memories")
total_count: int = Field(..., description="Total number of memories found")
query: str = Field(..., description="Original search query")
class MemoryAddResponse(BaseModel):
"""Response from adding memories - pure Mem0 structure."""
added_memories: List[Dict[str, Any]] = Field(
..., description="Memories that were added"
)
message: str = Field(..., description="Success message")
class GraphRelationship(BaseModel):
"""Graph relationship structure."""
source: str = Field(..., description="Source entity")
relationship: str = Field(..., description="Relationship type")
target: str = Field(..., description="Target entity")
properties: Optional[Dict[str, Any]] = Field(
None, description="Relationship properties"
)
class GraphResponse(BaseModel):
"""Graph relationships - pure Mem0 structure."""
relationships: List[GraphRelationship] = Field(
..., description="Found relationships"
)
entities: List[str] = Field(..., description="Unique entities")
user_id: str = Field(..., description="User identifier")
class HealthResponse(BaseModel):
"""Health check response."""
status: str = Field(..., description="Service status")
services: Dict[str, str] = Field(..., description="Individual service statuses")
timestamp: str = Field(..., description="Health check timestamp")
class ErrorResponse(BaseModel):
"""Error response structure."""
error: str = Field(..., description="Error message")
detail: Optional[str] = Field(None, description="Detailed error information")
status_code: int = Field(..., description="HTTP status code")
# Statistics and Monitoring Models
class MemoryOperationStats(BaseModel):
"""Memory operation statistics."""
add: int = Field(..., description="Number of add operations")
search: int = Field(..., description="Number of search operations")
update: int = Field(..., description="Number of update operations")
delete: int = Field(..., description="Number of delete operations")
class GlobalStatsResponse(BaseModel):
"""Global application statistics."""
total_memories: int = Field(..., description="Total memories across all users")
total_users: int = Field(..., description="Total number of users")
api_calls_today: int = Field(..., description="Total API calls today")
avg_response_time_ms: float = Field(
..., description="Average response time in milliseconds"
)
memory_operations: MemoryOperationStats = Field(
..., description="Memory operation breakdown"
)
uptime_seconds: float = Field(..., description="Application uptime in seconds")
class UserStatsResponse(BaseModel):
"""User-specific statistics."""
user_id: str = Field(..., description="User identifier")
memory_count: int = Field(..., description="Number of memories for this user")
relationship_count: int = Field(
..., description="Number of graph relationships for this user"
)
last_activity: Optional[str] = Field(None, description="Last activity timestamp")
api_calls_today: int = Field(..., description="API calls made by this user today")
avg_response_time_ms: float = Field(
..., description="Average response time for this user's requests"
)