Two helpers built during the beast deployment to migrate the legacy
Neo4j knowledge graph (decommissioned in the v3 cutover) into mem0 v2
as natural-language memories.
scripts/import_neo4j_to_mem0.py
- Connects to Neo4j via Bolt, iterates per-user relationships,
POSTs each as a /memories request.
- Two modes:
raw: "humanize(src) verb humanize(dest)." (snake_case → spaces)
--llm-rewrite: minimax-m2 via OpenAI-compat proxy rewrites each
tuple into a grammatical English sentence; the LLM
may also output SKIP for non-meaningful tuples
(postal codes, timezone offsets, self-refs).
- Tags every imported memory with metadata.source="neo4j_legacy_import"
plus neo4j_rel_type + import_timestamp for traceability/cleanup.
- Caches LLM rewrites by (source, rel, dest, user_id).
scripts/cleanup_neo4j_imports.py
- Finds and DELETEs all memories with source="neo4j_legacy_import"
for given users, via the /memories DELETE endpoint (per-user API
key, so the deletes go through mem0's normal auth + cleanup path).
Run on beast (2026-05-23): 2007 Neo4j edges → 615 net new memories in
mem0_v3 (30.6% yield after LLM SKIPs + mem0 fact-extraction dedup).
mem0 v3's fact extractor correctly deduplicated edges that restated
facts already in vector memory (e.g., manju's 9 existing memories
absorbed all 17 of her Neo4j edges).
100 lines
3.1 KiB
Python
100 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Delete memories that were imported from Neo4j (tagged metadata.source='neo4j_legacy_import').
|
|
|
|
Usage:
|
|
docker compose exec backend python /tmp/cleanup.py --user alice
|
|
docker compose exec backend python /tmp/cleanup.py --users alice,hetashree,manju
|
|
docker compose exec backend python /tmp/cleanup.py --dry-run --users alice
|
|
"""
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
import httpx
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.http import models
|
|
|
|
QDRANT_HOST = os.environ.get("QDRANT_HOST", "qdrant")
|
|
QDRANT_PORT = int(os.environ.get("QDRANT_PORT", "6333"))
|
|
QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION_NAME", "mem0_v3")
|
|
MEM0_URL = os.environ.get("MEM0_URL", "http://localhost:8000")
|
|
|
|
|
|
def load_api_keys() -> dict:
|
|
raw = os.environ.get("API_KEYS", "{}")
|
|
keys = json.loads(raw)
|
|
inv: dict = {}
|
|
for k, u in keys.items():
|
|
inv.setdefault(u, k)
|
|
return inv
|
|
|
|
|
|
def scroll_legacy_for(client: QdrantClient, user_id: str) -> list:
|
|
"""Return all memory IDs in mem0_v3 with source='neo4j_legacy_import' for the user."""
|
|
ids = []
|
|
offset = None
|
|
while True:
|
|
points, offset = client.scroll(
|
|
collection_name=QDRANT_COLLECTION,
|
|
scroll_filter=models.Filter(
|
|
must=[
|
|
models.FieldCondition(key="user_id", match=models.MatchValue(value=user_id)),
|
|
models.FieldCondition(key="source", match=models.MatchValue(value="neo4j_legacy_import")),
|
|
]
|
|
),
|
|
limit=128,
|
|
with_payload=False,
|
|
with_vectors=False,
|
|
offset=offset,
|
|
)
|
|
ids.extend(p.id for p in points)
|
|
if offset is None:
|
|
break
|
|
return ids
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--user", help="Single user to clean")
|
|
ap.add_argument("--users", help="Comma-separated user list")
|
|
ap.add_argument("--dry-run", action="store_true")
|
|
args = ap.parse_args()
|
|
|
|
targets = []
|
|
if args.user:
|
|
targets.append(args.user)
|
|
if args.users:
|
|
targets.extend(u.strip() for u in args.users.split(","))
|
|
if not targets:
|
|
print("Usage: --user X or --users a,b,c", file=sys.stderr)
|
|
return 2
|
|
|
|
api_keys = load_api_keys()
|
|
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
|
http = httpx.Client(timeout=60.0)
|
|
|
|
for user_id in targets:
|
|
ids = scroll_legacy_for(client, user_id)
|
|
print(f"{user_id}: found {len(ids)} legacy-import memories")
|
|
if args.dry_run:
|
|
continue
|
|
key = api_keys.get(user_id)
|
|
if not key:
|
|
print(f" no API key for {user_id} — skipping")
|
|
continue
|
|
deleted = errors = 0
|
|
for mid in ids:
|
|
r = http.delete(f"{MEM0_URL}/memories/{mid}", headers={"X-API-Key": key})
|
|
if r.status_code == 200:
|
|
deleted += 1
|
|
else:
|
|
errors += 1
|
|
print(f" failed {mid}: HTTP {r.status_code} {r.text[:120]}")
|
|
print(f" deleted={deleted} errors={errors}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|