From 06875473b24e3b7ccb5be36f5dbed8b7043938a1 Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Sat, 23 May 2026 19:53:59 +0530 Subject: [PATCH] feat: enable reranker, automate backups, tune extraction prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three S-effort wins from the post-migration audit: #1 Enable Cohere reranker on both Memory.search call sites (rerank=True), over-fetch top_k=max(limit*3, 30) to give the reranker a 30-50 candidate pool, then truncate to the caller's limit. Bump reranker config to rerank-v3.5 (4096 ctx, multilingual — matters for Hindi/Hinglish traffic) and top_n 10 → 50 so the output cap doesn't truncate below typical over-fetch sizes. Cohere was configured but never invoked; this is the single biggest quality lift the audit surfaced. #2 Add scripts/backup_qdrant.sh and scripts/restore_test.sh. Daily snapshot of both collections back-to-back, docker cp to local YYYY-MM-DD dir, optional rclone off-host, prune local >14d, emit Prometheus textfile metric. Weekly restore_test.sh restores into a transient collection and asserts point count parity. Closes the zero-automated-backup gap. #3 Add CUSTOM_FACT_EXTRACTION_INSTRUCTIONS, wired via MemoryConfig's custom_instructions field. mem0 appends this as its own '## Custom Instructions' section in the additive-extraction user prompt (verified against generate_additive_extraction_prompt) — does not replace mem0's role/format guidance. Re-prioritizes the default consumer-organizer few-shots toward work/projects/ relationships/recurring context, the actual usage pattern here. --- backend/mem0_manager.py | 56 ++++++++++++++++++++--- scripts/backup_qdrant.sh | 97 ++++++++++++++++++++++++++++++++++++++++ scripts/restore_test.sh | 76 +++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+), 7 deletions(-) create mode 100755 scripts/backup_qdrant.sh create mode 100755 scripts/restore_test.sh diff --git a/backend/mem0_manager.py b/backend/mem0_manager.py index abe080f..7575d86 100644 --- a/backend/mem0_manager.py +++ b/backend/mem0_manager.py @@ -75,6 +75,33 @@ def _build_filters( return merged +# Appended as the "## Custom Instructions" section of the additive-extraction +# prompt (mem0/configs/prompts.py::generate_additive_extraction_prompt). The +# default few-shot bias is consumer-organizer ("favourite movies", "SF restaurants"), +# which under-extracts on the work/project/relationship traffic this deployment +# actually sees. This re-prioritizes without replacing mem0's structural guidance. +CUSTOM_FACT_EXTRACTION_INSTRUCTIONS = """ +This memory store serves a working assistant — engineering, product, and operational contexts plus the user's people and recurring life context. Prioritize accordingly: + +HIGH-VALUE facts to capture: +- Work context: company, team, role; ongoing projects with goals/status/blockers; product or domain knowledge being built; tools/frameworks/languages in active use; technical decisions and the reasoning; recurring meetings or rituals. +- People in the user's orbit: colleagues, family, friends, mentors — names, relationships, roles, what they do, the current state of the relationship or shared context. +- Recurring personal context: home/work locations, regular schedule, standing commitments, durable preferences (food restrictions, working hours, communication style), planned events with dates. +- Acquired knowledge: concepts being studied or built, specific problems being solved, prior solutions tried and their outcomes. + +LOWER-PRIORITY (extract only if they reveal a pattern or future relevance): +- Single transient states ("running 5 minutes late", "didn't sleep well") — capture only if they recur or signal a habit. +- Movies, music, restaurants, hobbies — only when noted as durable preferences or part of a recurring activity, not when mentioned in passing. + +SKIP entirely: +- Generic world knowledge (timezones, capital cities, definitions) — the assistant already knows these. +- Greetings, acknowledgments, meta-conversation ("Thanks!", "Got it"). +- Restatements or paraphrases of facts already in Existing Memories or Recently Extracted Memories. + +Prefer specificity. "Pratik uses FastAPI for backend services" beats "Pratik does backend development." When a person is mentioned by a short name or nickname, capture the relationship if known ("Anushree is Pratik's wife") so future references resolve correctly. +""".strip() + + class Mem0Manager: """ Ultra-minimal manager that bridges custom OpenAI endpoint with pure Mem0. @@ -91,6 +118,7 @@ class Mem0Manager: ) config = { "version": "v1.1", + "custom_instructions": CUSTOM_FACT_EXTRACTION_INSTRUCTIONS, "llm": { "provider": "openai", "config": { @@ -129,8 +157,14 @@ class Mem0Manager: "provider": "cohere", "config": { "api_key": settings.cohere_api_key, - "model": "rerank-english-v3.0", - "top_n": 10, + # v3.5 supersedes v3.0: 4096-token context, multilingual + # (our users include Hindi/Hinglish content that the + # English-only v3 silently underperforms on). + "model": "rerank-v3.5", + # Raised from 10 → 50 so the rerank output cap does not + # truncate below typical over-fetch sizes (see search calls + # below, which request top_k up to ~3× the user's limit). + "top_n": 50, }, }, } @@ -227,15 +261,20 @@ class Mem0Manager: "note": "Empty query provided, no results returned. Use a specific query to search memories.", } # mem0 v2: entity IDs must live inside the `filters` dict; `limit` is now `top_k`. + # Over-fetch a 30–50-candidate pool so the Cohere reranker (rerank=True) + # has room to reorder; then truncate to the caller's requested limit. + overfetch = max(limit * 3, 30) result = self.memory.search( query=query, filters=_build_filters(user_id, agent_id, run_id, extra=filters), - top_k=limit, + top_k=overfetch, threshold=threshold, + rerank=True, ) + memories = result.get("results", [])[:limit] return { - "memories": result.get("results", []), - "total_count": len(result.get("results", [])), + "memories": memories, + "total_count": len(memories), "query": query, } except Exception as e: @@ -376,13 +415,16 @@ class Mem0Manager: logger.info("Starting chat request", user_id=user_id) search_start_time = time.time() + # Over-fetch for the Cohere reranker (rerank=True), then keep the + # top 10 reranked memories for the system prompt. search_result = self.memory.search( query=message, filters=_build_filters(user_id, agent_id, run_id), - top_k=10, + top_k=30, threshold=0.3, + rerank=True, ) - relevant_memories = search_result.get("results", []) + relevant_memories = search_result.get("results", [])[:10] memories_str = "\n".join( f"- {entry['memory']}" for entry in relevant_memories ) diff --git a/scripts/backup_qdrant.sh b/scripts/backup_qdrant.sh new file mode 100755 index 0000000..0b5bd26 --- /dev/null +++ b/scripts/backup_qdrant.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Qdrant snapshot + off-host rotation. +# +# Snapshots both collections (mem0_v3 + mem0_v3_entities) back-to-back via the +# Qdrant REST API, downloads them to a date-stamped local directory, uploads to +# the configured rclone remote, prunes local copies older than 14 days, and +# emits a Prometheus textfile metric for future scrape. +# +# Env vars (override defaults): +# QDRANT_CONTAINER container name (default: mem0-qdrant) +# COLLECTIONS space-separated collection names +# (default: "mem0_v3 mem0_v3_entities") +# BACKUP_DIR local backup root +# (default: ~/aistuff/mem0/backups/qdrant) +# RCLONE_REMOTE rclone remote path (e.g. b2:mem0-backups/qdrant). +# If unset, off-host upload is skipped. +# LOCAL_RETENTION_DAYS how long to keep local copies (default: 14) +# TEXTFILE_DIR Prometheus node_exporter textfile collector dir +# (default: /var/lib/node_exporter/textfile_collector, +# skipped if the dir does not exist) +# +# Suggested cron (daily at 03:00 UTC): +# 0 3 * * * RCLONE_REMOTE=b2:mem0-backups/qdrant /home/ubuntu/aistuff/mem0/scripts/backup_qdrant.sh >> /home/ubuntu/aistuff/mem0/backups/backup.log 2>&1 +# +# Exit codes: +# 0 success +# 1 snapshot/download failure +# 2 rclone failure (after local download succeeded) + +set -euo pipefail + +QDRANT_CONTAINER="${QDRANT_CONTAINER:-mem0-qdrant}" +COLLECTIONS="${COLLECTIONS:-mem0_v3 mem0_v3_entities}" +BACKUP_DIR="${BACKUP_DIR:-$HOME/aistuff/mem0/backups/qdrant}" +RCLONE_REMOTE="${RCLONE_REMOTE:-}" +LOCAL_RETENTION_DAYS="${LOCAL_RETENTION_DAYS:-14}" +TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}" + +TS="$(date -u +%Y%m%dT%H%M%SZ)" +DAY="$(date -u +%Y-%m-%d)" +TARGET_DIR="$BACKUP_DIR/$DAY" +mkdir -p "$TARGET_DIR" + +log() { printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*"; } + +log "starting backup ts=$TS dir=$TARGET_DIR collections=$COLLECTIONS" + +total_bytes=0 +for col in $COLLECTIONS; do + log "snapshot create: $col" + resp=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \ + "http://localhost:6333/collections/$col/snapshots?wait=true") + snap_name=$(printf '%s' "$resp" \ + | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["name"])') + + out_file="$TARGET_DIR/${col}_${TS}_${snap_name}" + log "snapshot download: $col/$snap_name -> $out_file" + docker cp "$QDRANT_CONTAINER:/qdrant/storage/collections/$col/snapshots/$snap_name" "$out_file" + + # Remove the in-container snapshot to avoid disk bloat on the volume. + docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \ + "http://localhost:6333/collections/$col/snapshots/$snap_name" >/dev/null + + size=$(stat -c %s "$out_file" 2>/dev/null || stat -f %z "$out_file") + total_bytes=$((total_bytes + size)) + log "downloaded: $out_file ($size bytes)" +done + +if [ -n "$RCLONE_REMOTE" ]; then + log "rclone copy: $TARGET_DIR -> $RCLONE_REMOTE/$DAY" + if ! rclone copy "$TARGET_DIR" "$RCLONE_REMOTE/$DAY"; then + log "rclone failed (local copies retained)" + exit 2 + fi +else + log "RCLONE_REMOTE unset; skipping off-host upload" +fi + +log "pruning local copies older than $LOCAL_RETENTION_DAYS days" +find "$BACKUP_DIR" -mindepth 1 -maxdepth 1 -type d -mtime "+$LOCAL_RETENTION_DAYS" -exec rm -rf {} + + +if [ -d "$TEXTFILE_DIR" ]; then + tmp="$(mktemp)" + { + echo "# HELP qdrant_last_backup_timestamp_seconds Unix timestamp of last successful Qdrant backup." + echo "# TYPE qdrant_last_backup_timestamp_seconds gauge" + echo "qdrant_last_backup_timestamp_seconds $(date -u +%s)" + echo "# HELP qdrant_last_backup_bytes Total bytes of last successful Qdrant backup." + echo "# TYPE qdrant_last_backup_bytes gauge" + echo "qdrant_last_backup_bytes $total_bytes" + } > "$tmp" + mv "$tmp" "$TEXTFILE_DIR/qdrant_backup.prom" + log "textfile metric written: $TEXTFILE_DIR/qdrant_backup.prom" +fi + +log "backup complete: $total_bytes bytes across $(echo "$COLLECTIONS" | wc -w) collection(s)" diff --git a/scripts/restore_test.sh b/scripts/restore_test.sh new file mode 100755 index 0000000..a2c7d02 --- /dev/null +++ b/scripts/restore_test.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# +# Weekly Qdrant restore sanity check. +# +# Finds the most recent backup tarball for SOURCE_COLLECTION, restores it into +# a transient collection, asserts the restored point count >= production, and +# cleans up. Exits non-zero on any failure so cron alerting catches it. +# +# Env vars: +# QDRANT_CONTAINER container name (default: mem0-qdrant) +# BACKUP_DIR local backup root +# (default: ~/aistuff/mem0/backups/qdrant) +# SOURCE_COLLECTION collection to verify (default: mem0_v3) +# TEST_COLLECTION transient collection name +# (default: mem0_v3_restore_test) +# +# Suggested cron (weekly Sunday 04:00 UTC): +# 0 4 * * 0 /home/ubuntu/aistuff/mem0/scripts/restore_test.sh >> /home/ubuntu/aistuff/mem0/backups/restore_test.log 2>&1 + +set -euo pipefail + +QDRANT_CONTAINER="${QDRANT_CONTAINER:-mem0-qdrant}" +BACKUP_DIR="${BACKUP_DIR:-$HOME/aistuff/mem0/backups/qdrant}" +SOURCE_COLLECTION="${SOURCE_COLLECTION:-mem0_v3}" +TEST_COLLECTION="${TEST_COLLECTION:-mem0_v3_restore_test}" + +log() { printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*"; } + +# Pick the most recently-modified backup file matching SOURCE_COLLECTION_*. +latest="$(find "$BACKUP_DIR" -type f -name "${SOURCE_COLLECTION}_*" -printf '%T@ %p\n' 2>/dev/null \ + | sort -nr | head -1 | cut -d' ' -f2-)" +if [ -z "$latest" ]; then + log "ERROR: no backup found under $BACKUP_DIR matching ${SOURCE_COLLECTION}_*" + exit 1 +fi +log "latest backup: $latest" + +prod_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \ + "http://localhost:6333/collections/$SOURCE_COLLECTION/points/count" \ + -H "Content-Type: application/json" \ + -d '{"exact":true}' \ + | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])') +log "production count ($SOURCE_COLLECTION): $prod_count" + +# Drop any leftover test collection from a previous failed run. +docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \ + "http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null 2>&1 || true + +snap_basename="$(basename "$latest")" +log "copying snapshot into container: /tmp/$snap_basename" +docker cp "$latest" "$QDRANT_CONTAINER:/tmp/$snap_basename" + +log "restoring into $TEST_COLLECTION" +docker exec "$QDRANT_CONTAINER" curl -fsS -X PUT \ + "http://localhost:6333/collections/$TEST_COLLECTION/snapshots/recover" \ + -H "Content-Type: application/json" \ + -d "{\"location\":\"file:///tmp/$snap_basename\",\"priority\":\"snapshot\"}" \ + >/dev/null + +restored_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \ + "http://localhost:6333/collections/$TEST_COLLECTION/points/count" \ + -H "Content-Type: application/json" \ + -d '{"exact":true}' \ + | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])') +log "restored count: $restored_count" + +# Cleanup whether or not the assertion passes. +docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \ + "http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null +docker exec "$QDRANT_CONTAINER" rm -f "/tmp/$snap_basename" + +if [ "$restored_count" -lt "$prod_count" ]; then + log "FAIL: restored=$restored_count < production=$prod_count" + exit 1 +fi +log "OK: restored=$restored_count >= production=$prod_count"