From 2045a042eb2fd9c646a3f26b0ef75abb1a622d72 Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 27 Oct 2025 15:29:55 +0000 Subject: [PATCH] Working frontend and openai compatible endpoint --- backend/Dockerfile | 9 +- backend/auth.py | 40 ++++- backend/main.py | 194 +++++++++++++++++++++++- backend/mem0_manager.py | 8 +- backend/models.py | 70 ++++++++- docker-compose.yml | 25 +++- frontend/index.html | 317 +++++++++++++++++++++++++++++++++++++--- 7 files changed, 623 insertions(+), 40 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 7eb3825..9f06dcb 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -11,11 +11,14 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* # Copy requirements and install Python dependencies -COPY requirements.txt . +COPY backend/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -# Copy application code -COPY . . +# Copy backend code +COPY backend/ . + +# Copy frontend directory +COPY frontend/ /app/frontend/ # Set Python path ENV PYTHONPATH=/app diff --git a/backend/auth.py b/backend/auth.py index 5b404d6..75a4431 100644 --- a/backend/auth.py +++ b/backend/auth.py @@ -1,7 +1,7 @@ """Simple API key authentication for Mem0 Interface.""" from typing import Optional -from fastapi import HTTPException, Security, status +from fastapi import HTTPException, Security, status, Header from fastapi.security import APIKeyHeader import structlog @@ -91,6 +91,44 @@ async def get_current_user(api_key: str = Security(api_key_header)) -> str: return auth_service.verify_api_key(api_key) +async def get_current_user_openai( + authorization: Optional[str] = Header(None), + x_api_key: Optional[str] = Header(None, alias="X-API-Key") +) -> str: + """ + FastAPI dependency for OpenAI-compatible authentication. + Supports both Authorization: Bearer and X-API-Key headers. + + Args: + authorization: Authorization header (Bearer token) + x_api_key: X-API-Key header + + Returns: + str: Authenticated user_id + + Raises: + HTTPException: If no valid API key is provided + """ + api_key = None + + # Try Bearer token first (OpenAI standard) + if authorization and authorization.startswith("Bearer "): + api_key = authorization[7:] # Remove "Bearer " prefix + logger.debug(f"Extracted API key from Authorization Bearer token") + # Fall back to X-API-Key header + elif x_api_key: + api_key = x_api_key + logger.debug(f"Extracted API key from X-API-Key header") + else: + logger.warning("No API key provided in Authorization or X-API-Key headers") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Missing API key. Provide either 'Authorization: Bearer ' or 'X-API-Key: ' header" + ) + + return auth_service.verify_api_key(api_key) + + async def verify_user_access( api_key: str = Security(api_key_header), user_id: Optional[str] = None diff --git a/backend/main.py b/backend/main.py index ca8d6ea..a4faaa9 100644 --- a/backend/main.py +++ b/backend/main.py @@ -8,18 +8,24 @@ from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Security from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, StreamingResponse, FileResponse import structlog +import json +import asyncio +from pathlib import Path from config import settings from models import ( ChatRequest, MemoryAddRequest, MemoryAddResponse, MemorySearchRequest, MemorySearchResponse, MemoryUpdateRequest, MemoryItem, GraphResponse, HealthResponse, ErrorResponse, - GlobalStatsResponse, UserStatsResponse + GlobalStatsResponse, UserStatsResponse, + OpenAIChatCompletionRequest, OpenAIChatCompletionResponse, + OpenAIChoice, OpenAIChoiceMessage, OpenAIUsage, + OpenAIStreamChoice, OpenAIStreamDelta ) from mem0_manager import mem0_manager -from auth import get_current_user, auth_service +from auth import get_current_user, get_current_user_openai, auth_service # Configure structured logging structlog.configure( @@ -175,6 +181,16 @@ async def global_exception_handler(request, exc): ) +# Root endpoint - serve frontend +@app.get("/") +async def root(): + """Serve the frontend HTML interface.""" + frontend_path = Path("/app/frontend/index.html") + if frontend_path.exists(): + return FileResponse(frontend_path) + return {"message": "Mem0 API", "version": "1.0.0", "docs": "/docs"} + + # Health check endpoint @app.get("/health", response_model=HealthResponse) async def health_check(): @@ -236,6 +252,178 @@ async def chat_with_memory( raise HTTPException(status_code=500, detail=str(e)) +async def stream_openai_response(completion_id: str, model: str, content: str, created: int): + """ + Generate Server-Sent Events (SSE) stream for OpenAI-compatible streaming. + + Simulates streaming by chunking the response content. + """ + import uuid + + # First chunk with role + chunk = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [{ + "index": 0, + "delta": {"role": "assistant", "content": ""}, + "finish_reason": None + }] + } + yield f"data: {json.dumps(chunk)}\n\n" + + # Stream content in chunks (simulate streaming by splitting into words) + # For true streaming from LLM, we'd stream as tokens arrive + words = content.split() + chunk_size = 3 # Send 3 words at a time for smooth streaming effect + + for i in range(0, len(words), chunk_size): + word_chunk = " ".join(words[i:i + chunk_size]) + if i + chunk_size < len(words): + word_chunk += " " # Add space between chunks except last + + chunk = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [{ + "index": 0, + "delta": {"content": word_chunk}, + "finish_reason": None + }] + } + yield f"data: {json.dumps(chunk)}\n\n" + await asyncio.sleep(0.05) # Small delay for streaming effect + + # Final chunk with finish_reason + chunk = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [{ + "index": 0, + "delta": {}, + "finish_reason": "stop" + }] + } + yield f"data: {json.dumps(chunk)}\n\n" + + # OpenAI standard: end with [DONE] + yield "data: [DONE]\n\n" + + +# OpenAI-compatible chat completions endpoint +@app.post("/v1/chat/completions") +@app.post("/chat/completions") +async def openai_chat_completions( + request: OpenAIChatCompletionRequest, + authenticated_user: str = Depends(get_current_user_openai) +): + """ + OpenAI-compatible chat completions endpoint with automatic mem0 memory integration. + + Available at both: + - /v1/chat/completions (OpenAI standard) + - /chat/completions (direct access) + + - API key maps to user_id automatically + - Memories are searched and added transparently + - Compatible with OpenAI Python SDK and other OpenAI-compatible clients + """ + try: + import uuid + + # Extract user_id from authenticated API key + user_id = authenticated_user + logger.info(f"Processing OpenAI chat completion for user: {user_id} (streaming={request.stream})") + + # Extract last user message and conversation context + user_messages = [m for m in request.messages if m.get("role") == "user"] + if not user_messages: + raise HTTPException( + status_code=400, + detail="No user messages provided. Include at least one message with role='user'." + ) + + last_message = user_messages[-1].get("content", "") + context = request.messages[:-1] # All messages except the last one + + logger.info(f"Last user message: {last_message[:100]}...") + logger.info(f"Context messages: {len(context)}") + + # Call existing chat_with_memory (handles mem0 search + LLM + mem0 add) + result = await mem0_manager.chat_with_memory( + message=last_message, + user_id=user_id, + context=context if context else None + ) + + # Generate IDs and timestamps + completion_id = f"chatcmpl-{uuid.uuid4().hex[:24]}" + created_time = int(time.time()) + assistant_content = result.get("response", "") + + # Return streaming or non-streaming response + if request.stream: + logger.info(f"Returning streaming response for {completion_id}") + return StreamingResponse( + stream_openai_response( + completion_id=completion_id, + model=settings.default_model, + content=assistant_content, + created=created_time + ), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + } + ) + else: + # Non-streaming response (original format) + response = OpenAIChatCompletionResponse( + id=completion_id, + object="chat.completion", + created=created_time, + model=settings.default_model, + choices=[ + OpenAIChoice( + index=0, + message=OpenAIChoiceMessage( + role="assistant", + content=assistant_content + ), + finish_reason="stop" + ) + ], + usage=OpenAIUsage( + prompt_tokens=0, # We don't track tokens yet + completion_tokens=0, + total_tokens=0 + ) + ) + + logger.info( + f"OpenAI completion successful", + completion_id=completion_id, + user_id=user_id, + memories_used=result.get("memories_used", 0) + ) + + return response + + except HTTPException as e: + logger.error(f"HTTP Exception in OpenAI endpoint: {e.status_code} - {e.detail}") + raise + except Exception as e: + logger.error(f"Error in OpenAI chat completions endpoint: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + # Memory management endpoints - pure Mem0 passthroughs @app.post("/memories") async def add_memories( diff --git a/backend/mem0_manager.py b/backend/mem0_manager.py index c3c00c2..cc2345f 100644 --- a/backend/mem0_manager.py +++ b/backend/mem0_manager.py @@ -52,10 +52,10 @@ class Mem0Manager: "embedder": { "provider": "ollama", "config": { - "model": "qwen3-embedding:4b-q8_0", + "model": "hf.co/Qwen/Qwen3-Embedding-0.6B-GGUF:Q8_0", # "api_key": settings.embedder_api_key, - "ollama_base_url": "http://172.17.0.1:11434", - "embedding_dims": 2560 + "ollama_base_url": "https://models.breezehq.dev", + "embedding_dims": 1024 } }, "vector_store": { @@ -64,7 +64,7 @@ class Mem0Manager: "collection_name": settings.qdrant_collection_name, "host": settings.qdrant_host, "port": settings.qdrant_port, - "embedding_model_dims": 2560, + "embedding_model_dims": 1024, "on_disk": True } }, diff --git a/backend/models.py b/backend/models.py index 82fe1fa..3d358c6 100644 --- a/backend/models.py +++ b/backend/models.py @@ -137,4 +137,72 @@ class UserStatsResponse(BaseModel): relationship_count: int = Field(..., description="Number of graph relationships for this user") last_activity: Optional[str] = Field(None, description="Last activity timestamp") api_calls_today: int = Field(..., description="API calls made by this user today") - avg_response_time_ms: float = Field(..., description="Average response time for this user's requests") \ No newline at end of file + avg_response_time_ms: float = Field(..., description="Average response time for this user's requests") + + +# OpenAI-Compatible API Models + +class OpenAIMessage(BaseModel): + """OpenAI message format.""" + role: str = Field(..., description="Message role (system, user, assistant)") + content: str = Field(..., description="Message content") + + +class OpenAIChatCompletionRequest(BaseModel): + """OpenAI chat completion request format.""" + model: str = Field(..., description="Model to use (will use configured default)") + messages: List[Dict[str, str]] = Field(..., description="List of messages") + temperature: Optional[float] = Field(0.7, description="Sampling temperature") + max_tokens: Optional[int] = Field(None, description="Maximum tokens to generate") + stream: Optional[bool] = Field(False, description="Whether to stream responses") + top_p: Optional[float] = Field(1.0, description="Nucleus sampling parameter") + n: Optional[int] = Field(1, description="Number of completions to generate") + stop: Optional[List[str]] = Field(None, description="Stop sequences") + presence_penalty: Optional[float] = Field(0, description="Presence penalty") + frequency_penalty: Optional[float] = Field(0, description="Frequency penalty") + user: Optional[str] = Field(None, description="User identifier (ignored, uses API key)") + + +class OpenAIUsage(BaseModel): + """Token usage information.""" + prompt_tokens: int = Field(..., description="Tokens in the prompt") + completion_tokens: int = Field(..., description="Tokens in the completion") + total_tokens: int = Field(..., description="Total tokens used") + + +class OpenAIChoiceMessage(BaseModel): + """Message in a choice.""" + role: str = Field(..., description="Role of the message") + content: str = Field(..., description="Content of the message") + + +class OpenAIChoice(BaseModel): + """Individual completion choice.""" + index: int = Field(..., description="Choice index") + message: OpenAIChoiceMessage = Field(..., description="Message content") + finish_reason: str = Field(..., description="Reason for completion finish") + + +class OpenAIChatCompletionResponse(BaseModel): + """OpenAI chat completion response format.""" + id: str = Field(..., description="Unique completion ID") + object: str = Field(default="chat.completion", description="Object type") + created: int = Field(..., description="Unix timestamp of creation") + model: str = Field(..., description="Model used for completion") + choices: List[OpenAIChoice] = Field(..., description="List of completion choices") + usage: Optional[OpenAIUsage] = Field(None, description="Token usage information") + + +# Streaming-specific models + +class OpenAIStreamDelta(BaseModel): + """Delta content in a streaming chunk.""" + role: Optional[str] = Field(None, description="Role (only in first chunk)") + content: Optional[str] = Field(None, description="Incremental content") + + +class OpenAIStreamChoice(BaseModel): + """Individual streaming choice.""" + index: int = Field(..., description="Choice index") + delta: OpenAIStreamDelta = Field(..., description="Delta content") + finish_reason: Optional[str] = Field(None, description="Reason for completion finish") \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 3d1b629..444aa58 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,8 @@ services: timeout: 5s retries: 5 restart: unless-stopped + networks: + - mem0_network # Neo4j with APOC for graph relationships neo4j: @@ -32,6 +34,8 @@ services: expose: - "7474" # HTTP - Internal only - "7687" # Bolt - Internal only + networks: + - mem0_network volumes: - neo4j_data:/data - neo4j_logs:/logs @@ -46,9 +50,9 @@ services: # Backend API service backend: - build: - context: ./backend - dockerfile: Dockerfile + build: + context: . + dockerfile: ./backend/Dockerfile container_name: mem0-backend environment: OPENAI_API_KEY: ${OPENAI_COMPAT_API_KEY} @@ -65,8 +69,11 @@ services: CORS_ORIGINS: ${CORS_ORIGINS:-http://localhost:3000} DEFAULT_MODEL: ${DEFAULT_MODEL:-claude-sonnet-4} API_KEYS: ${API_KEYS:-{}} - ports: - - "${BACKEND_PORT:-8000}:8000" + expose: + - 8000 + networks: + - npm_network + - mem0_network depends_on: qdrant: condition: service_healthy @@ -75,7 +82,8 @@ services: restart: unless-stopped volumes: - ./backend:/app - command: ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] + - ./frontend:/app/frontend + command: ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4"] volumes: qdrant_data: @@ -85,5 +93,6 @@ volumes: neo4j_plugins: networks: - default: - name: mem0-network + mem0_network: + npm_network: + external: true diff --git a/frontend/index.html b/frontend/index.html index ed44296..77f2888 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -18,12 +18,106 @@ display: flex; } + /* Login Screen */ + .login-screen { + display: flex; + align-items: center; + justify-content: center; + width: 100%; + height: 100vh; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + } + + .login-screen.hidden { + display: none; + } + + .login-box { + background: white; + padding: 40px; + border-radius: 12px; + box-shadow: 0 10px 40px rgba(0, 0, 0, 0.2); + width: 100%; + max-width: 400px; + } + + .login-box h1 { + margin-bottom: 10px; + color: #333; + font-size: 28px; + text-align: center; + } + + .login-box p { + color: #666; + font-size: 14px; + text-align: center; + margin-bottom: 30px; + } + + .login-box input { + width: 100%; + padding: 14px; + border: 2px solid #e0e0e0; + border-radius: 8px; + font-size: 14px; + margin-bottom: 20px; + outline: none; + transition: border-color 0.3s; + } + + .login-box input:focus { + border-color: #667eea; + } + + .login-box button { + width: 100%; + padding: 14px; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + border: none; + border-radius: 8px; + font-size: 16px; + font-weight: 600; + cursor: pointer; + transition: transform 0.2s, opacity 0.3s; + } + + .login-box button:hover { + transform: translateY(-2px); + } + + .login-box button:disabled { + opacity: 0.6; + cursor: not-allowed; + transform: none; + } + + .login-error { + background: #ffe6e6; + border: 1px solid #ffcccc; + color: #cc0000; + padding: 12px; + border-radius: 8px; + margin-bottom: 20px; + font-size: 14px; + display: none; + } + + .login-error.show { + display: block; + } + .container { display: flex; width: 100%; height: 100vh; } + .container.hidden { + display: none; + } + /* Chat Section */ .chat-section { flex: 1; @@ -58,7 +152,12 @@ font-size: 14px; } - .clear-chat-btn { + .header-buttons { + display: flex; + gap: 10px; + } + + .clear-chat-btn, .logout-btn { background: #f8f9fa; color: #666; border: 1px solid #e0e0e0; @@ -73,16 +172,27 @@ transition: all 0.2s ease; } - .clear-chat-btn:hover { + .clear-chat-btn:hover, .logout-btn:hover { background: #e9ecef; border-color: #ced4da; color: #495057; } - .clear-chat-btn:active { + .clear-chat-btn:active, .logout-btn:active { background: #dee2e6; } + .logout-btn { + background: #fff3cd; + border-color: #ffc107; + color: #856404; + } + + .logout-btn:hover { + background: #ffe69c; + border-color: #ffb300; + } + .chat-messages { flex: 1; overflow-y: auto; @@ -281,17 +391,42 @@ -
+ + + + +