knowledge-base/backend/monitoring.py
Pratik Narola 7689409950 Initial commit: Production-ready Mem0 interface with monitoring
- Complete Mem0 OSS integration with hybrid datastore
- PostgreSQL + pgvector for vector storage
- Neo4j 5.18 for graph relationships
- Google Gemini embeddings integration
- Comprehensive monitoring with correlation IDs
- Real-time statistics and performance tracking
- Production-grade observability features
- Clean repository with no exposed secrets
2025-08-10 17:34:41 +05:30

205 lines
No EOL
7.6 KiB
Python

"""Simple monitoring and statistics module for production debugging."""
import time
import uuid
import threading
from datetime import datetime, timezone
from typing import Dict, Any, Optional
from functools import wraps
from collections import defaultdict
import structlog
logger = structlog.get_logger(__name__)
class SimpleStats:
"""Thread-safe in-memory statistics storage."""
def __init__(self):
self._lock = threading.Lock()
self._start_time = time.time()
# Global counters
self._api_calls_today = 0
self._total_users = set()
self._memory_operations = defaultdict(int)
# Response time tracking
self._response_times = []
self._max_response_times = 1000 # Keep last 1000 measurements
# User-specific stats
self._user_stats = defaultdict(lambda: {
'api_calls_today': 0,
'response_times': [],
'last_activity': None
})
def record_api_call(self, user_id: Optional[str] = None, response_time_ms: float = 0):
"""Record an API call with timing."""
with self._lock:
self._api_calls_today += 1
# Track response times
self._response_times.append(response_time_ms)
if len(self._response_times) > self._max_response_times:
self._response_times.pop(0)
if user_id:
self._total_users.add(user_id)
user_data = self._user_stats[user_id]
user_data['api_calls_today'] += 1
user_data['response_times'].append(response_time_ms)
user_data['last_activity'] = datetime.now(timezone.utc).isoformat()
# Keep user response times bounded
if len(user_data['response_times']) > 100:
user_data['response_times'].pop(0)
def record_memory_operation(self, operation: str):
"""Record a memory operation (add, search, update, delete)."""
with self._lock:
self._memory_operations[operation] += 1
def get_global_stats(self) -> Dict[str, Any]:
"""Get global application statistics."""
with self._lock:
avg_response_time = sum(self._response_times) / len(self._response_times) if self._response_times else 0
uptime = time.time() - self._start_time
return {
'total_memories': 0, # Will be populated by actual Mem0 query
'total_users': len(self._total_users),
'api_calls_today': self._api_calls_today,
'avg_response_time_ms': round(avg_response_time, 2),
'memory_operations': {
'add': self._memory_operations['add'],
'search': self._memory_operations['search'],
'update': self._memory_operations['update'],
'delete': self._memory_operations['delete']
},
'uptime_seconds': round(uptime, 2)
}
def get_user_stats(self, user_id: str) -> Dict[str, Any]:
"""Get user-specific statistics."""
with self._lock:
user_data = self._user_stats[user_id]
avg_response_time = sum(user_data['response_times']) / len(user_data['response_times']) if user_data['response_times'] else 0
return {
'user_id': user_id,
'memory_count': 0, # Will be populated by actual Mem0 query
'relationship_count': 0, # Will be populated by actual Mem0 query
'last_activity': user_data['last_activity'],
'api_calls_today': user_data['api_calls_today'],
'avg_response_time_ms': round(avg_response_time, 2)
}
# Global statistics instance
stats = SimpleStats()
def generate_correlation_id() -> str:
"""Generate a unique correlation ID for request tracking."""
return str(uuid.uuid4())[:8]
def timed(operation_name: str):
"""Decorator to time function execution and log performance."""
def decorator(func):
@wraps(func)
async def async_wrapper(*args, **kwargs):
correlation_id = generate_correlation_id()
start_time = time.time()
# Log start of operation
logger.info(
f"Starting {operation_name}",
correlation_id=correlation_id,
operation=operation_name,
function=func.__name__
)
try:
result = await func(*args, **kwargs)
duration_ms = (time.time() - start_time) * 1000
# Log successful completion
logger.info(
f"Completed {operation_name}",
correlation_id=correlation_id,
operation=operation_name,
duration_ms=round(duration_ms, 2),
status="success"
)
# Record memory operations
if operation_name in ['add_memories', 'search_memories', 'update_memory', 'delete_memory']:
operation_type = operation_name.replace('_memories', '').replace('_memory', '')
stats.record_memory_operation(operation_type)
return result
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
# Log error with context
logger.error(
f"Failed {operation_name}",
correlation_id=correlation_id,
operation=operation_name,
duration_ms=round(duration_ms, 2),
status="error",
error=str(e),
exc_info=True
)
raise
@wraps(func)
def sync_wrapper(*args, **kwargs):
correlation_id = generate_correlation_id()
start_time = time.time()
# Log start of operation
logger.info(
f"Starting {operation_name}",
correlation_id=correlation_id,
operation=operation_name,
function=func.__name__
)
try:
result = func(*args, **kwargs)
duration_ms = (time.time() - start_time) * 1000
# Log successful completion
logger.info(
f"Completed {operation_name}",
correlation_id=correlation_id,
operation=operation_name,
duration_ms=round(duration_ms, 2),
status="success"
)
return result
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
# Log error with context
logger.error(
f"Failed {operation_name}",
correlation_id=correlation_id,
operation=operation_name,
duration_ms=round(duration_ms, 2),
status="error",
error=str(e),
exc_info=True
)
raise
# Return appropriate wrapper based on function type
if hasattr(func, '__code__') and func.__code__.co_flags & 0x80: # CO_COROUTINE
return async_wrapper
else:
return sync_wrapper
return decorator