#!/usr/bin/env python3 """Import Neo4j graph relationships into mem0 v2 as natural-language memories. For each `(source)-[rel_type]->(destination)` edge in the Neo4j graph, POST a short sentence ("source verb destination") to mem0's `/memories` endpoint. mem0's v3 fact-extraction + entity-linking pipeline handles the rest. Tagged with metadata.source="neo4j_legacy_import" so the imports can be identified (or bulk-deleted) later via the standard /memories endpoints. Usage (inside the mem0-backend container, which is on the same docker network as neo4j-temp and the mem0 backend itself): # Dry-run — show sample sentences without posting: docker compose exec backend python /tmp/import.py --dry-run --limit 30 # Single-user POC: docker compose exec backend python /tmp/import.py --user akshat # Full sweep: docker compose exec backend python /tmp/import.py Environment variables (all have sensible defaults for the beast deployment): NEO4J_URI bolt://neo4j-temp:7687 NEO4J_USER neo4j NEO4J_PASS mem0_neo4j_password MEM0_URL http://localhost:8000 API_KEYS JSON mapping {api_key: user_id} (already in container env) """ import argparse import json import os import sys import time from typing import Optional import httpx from neo4j import GraphDatabase NEO4J_URI = os.environ.get("NEO4J_URI", "bolt://neo4j-temp:7687") NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j") NEO4J_PASS = os.environ.get("NEO4J_PASS", "mem0_neo4j_password") MEM0_URL = os.environ.get("MEM0_URL", "http://localhost:8000") # LiteLLM proxy for sentence rewriting (--llm-rewrite mode) LLM_URL = os.environ.get("LLM_URL", os.environ.get("OPENAI_BASE_URL", "https://veronica.pratikn.com")) LLM_KEY = os.environ.get("LLM_KEY", os.environ.get("OPENAI_API_KEY", "")) LLM_MODEL = os.environ.get("LLM_MODEL", "minimax-m2") REWRITE_PROMPT = """You are converting a knowledge-graph relationship into a natural English sentence for a personal memory system. The user we're building memories for is identified by user_id="{user_id}". The graph may reference them by variant names (full name, "user_id:_{user_id}", nicknames). Treat all of those as the same person — in your output sentence, use the name "{user_id}" (capitalized appropriately) so the memory layer can link it consistently. Convert this tuple into ONE natural English sentence: source: {source} relationship: {rel} destination: {dest} Rules: - Output ONLY the sentence. No quotes, no preamble, no explanation, no markdown. - Use proper capitalization, grammar, and convert snake_case names to natural words ("custom_headers" → "custom headers"). - If the source IS the user, write a third-person sentence starting with "{user_id_cap}" (e.g., "{user_id_cap} works at TechCorp."). - If the source is a non-user entity, write a third-person sentence about that entity (e.g., "Dr Seta works at Chitra."). - If the tuple doesn't translate to a meaningful, retrievable fact (pure identifier metadata like a postal code, generic world knowledge like a timezone offset, or self-references), respond with exactly: SKIP""" def load_api_keys() -> dict: raw = os.environ.get("API_KEYS", "{}") keys = json.loads(raw) inv: dict = {} for k, u in keys.items(): inv.setdefault(u, k) return inv def humanize(name: str, user_id: Optional[str] = None) -> str: """snake_case node name → human-readable. __User__ nodes collapse to the user_id.""" if not name: return "" # The __User__ label uses name = "user_id:_" if name.startswith("user_id:_") and user_id: return user_id return name.replace("_", " ") def humanize_rel(rel_type: str) -> str: return rel_type.replace("_", " ") def build_sentence(source: str, rel_type: str, dest: str, user_id: str) -> str: return f"{humanize(source, user_id)} {humanize_rel(rel_type)} {humanize(dest, user_id)}." _llm_cache: dict = {} def llm_rewrite( source: str, rel_type: str, dest: str, user_id: str, client: httpx.Client, ) -> Optional[str]: """Ask minimax-m2 to convert the tuple to a natural sentence. Returns None for SKIP.""" key = (source, rel_type, dest, user_id) if key in _llm_cache: return _llm_cache[key] prompt = REWRITE_PROMPT.format( user_id=user_id, user_id_cap=user_id[:1].upper() + user_id[1:], source=source, rel=rel_type, dest=dest, ) resp = client.post( f"{LLM_URL}/v1/chat/completions", headers={"Authorization": f"Bearer {LLM_KEY}"}, json={ "model": LLM_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": 400, "temperature": 0.2, }, timeout=60.0, ) resp.raise_for_status() data = resp.json() content = (data["choices"][0]["message"].get("content") or "").strip() # Strip wrapping quotes/whitespace content = content.strip(" \"'\n\r\t") if not content or content.upper().strip(" .!") == "SKIP": _llm_cache[key] = None return None _llm_cache[key] = content return content def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--user", help="Single user_id to migrate (default: all)") ap.add_argument("--limit", type=int, help="Max relationships to process") ap.add_argument("--dry-run", action="store_true", help="Print sentences only, no POSTs") ap.add_argument("--neo4j-uri", default=NEO4J_URI) ap.add_argument("--mem0-url", default=MEM0_URL) ap.add_argument( "--llm-rewrite", action="store_true", help="Use minimax-m2 (via veronica.pratikn.com) to rewrite tuples into natural English sentences", ) ap.add_argument("--llm-url", default=LLM_URL) ap.add_argument("--llm-model", default=LLM_MODEL) args = ap.parse_args() api_keys = load_api_keys() # Build the cypher query where = "WHERE n.user_id IS NOT NULL AND m.user_id IS NOT NULL" params: dict = {} if args.user: where += " AND n.user_id = $uid" params["uid"] = args.user limit_clause = f"LIMIT {args.limit}" if args.limit else "" cypher = ( f"MATCH (n)-[r]->(m) {where} " "RETURN n.name AS source, type(r) AS rel, m.name AS dest, n.user_id AS user_id " f"{limit_clause}" ) driver = GraphDatabase.driver(args.neo4j_uri, auth=(NEO4J_USER, NEO4J_PASS)) with driver.session() as session: edges = list(session.run(cypher, **params)) driver.close() print(f"Found {len(edges)} relationships") client = httpx.Client(timeout=180.0) def make_sentence(e) -> Optional[str]: if args.llm_rewrite: return llm_rewrite(e["source"], e["rel"], e["dest"], e["user_id"], client) return build_sentence(e["source"], e["rel"], e["dest"], e["user_id"]) if args.dry_run: n_show = min(30, len(edges)) print(f"\nSample sentences (first {n_show})" + (" [LLM-rewritten]" if args.llm_rewrite else "") + ":") for e in edges[:n_show]: s = make_sentence(e) tup = f"({e['source']}, {e['rel']}, {e['dest']})" marker = "SKIP" if s is None else s print(f" [{e['user_id']}] {tup} -> {marker}") if len(edges) > n_show: print(f" ... and {len(edges) - n_show} more") # Show per-user breakdown by_user: dict = {} for e in edges: by_user[e["user_id"]] = by_user.get(e["user_id"], 0) + 1 print(f"\nBy user: {by_user}") return 0 timestamp = int(time.time()) stats = { "posted": 0, "skipped_no_key": 0, "skipped_llm": 0, "errors": 0, "extracted_total": 0, "no_facts": 0, } for i, edge in enumerate(edges, 1): user_id = edge["user_id"] api_key = api_keys.get(user_id) if not api_key: stats["skipped_no_key"] += 1 continue try: sentence = make_sentence(edge) except Exception as exc: stats["errors"] += 1 print(f" [{i}/{len(edges)}] LLM REWRITE EXCEPTION: {exc}") continue if sentence is None: stats["skipped_llm"] += 1 continue body = { "user_id": user_id, "messages": [{"role": "user", "content": sentence}], "metadata": { "source": "neo4j_legacy_import", "neo4j_rel_type": edge["rel"], "import_timestamp": timestamp, }, } try: r = client.post( f"{args.mem0_url}/memories", headers={"X-API-Key": api_key}, json=body, ) if r.status_code != 200: stats["errors"] += 1 print( f" [{i}/{len(edges)}] {user_id} {sentence!r} HTTP {r.status_code} " f"{r.text[:120]}" ) continue data = r.json() results = (data.get("added_memories") or [{}])[0].get("results", []) stats["extracted_total"] += len(results) if not results: stats["no_facts"] += 1 stats["posted"] += 1 if i % 5 == 0 or i == len(edges): print( f" [{i}/{len(edges)}] posted={stats['posted']} " f"extracted={stats['extracted_total']} " f"no_facts={stats['no_facts']} " f"llm_skipped={stats['skipped_llm']} " f"no_key={stats['skipped_no_key']} errors={stats['errors']}" ) except Exception as exc: stats["errors"] += 1 print(f" [{i}/{len(edges)}] EXCEPTION: {exc}") print(f"\nDONE: {stats}") return 0 if __name__ == "__main__": sys.exit(main())