Three S-effort wins from the post-migration audit: #1 Enable Cohere reranker on both Memory.search call sites (rerank=True), over-fetch top_k=max(limit*3, 30) to give the reranker a 30-50 candidate pool, then truncate to the caller's limit. Bump reranker config to rerank-v3.5 (4096 ctx, multilingual — matters for Hindi/Hinglish traffic) and top_n 10 → 50 so the output cap doesn't truncate below typical over-fetch sizes. Cohere was configured but never invoked; this is the single biggest quality lift the audit surfaced. #2 Add scripts/backup_qdrant.sh and scripts/restore_test.sh. Daily snapshot of both collections back-to-back, docker cp to local YYYY-MM-DD dir, optional rclone off-host, prune local >14d, emit Prometheus textfile metric. Weekly restore_test.sh restores into a transient collection and asserts point count parity. Closes the zero-automated-backup gap. #3 Add CUSTOM_FACT_EXTRACTION_INSTRUCTIONS, wired via MemoryConfig's custom_instructions field. mem0 appends this as its own '## Custom Instructions' section in the additive-extraction user prompt (verified against generate_additive_extraction_prompt) — does not replace mem0's role/format guidance. Re-prioritizes the default consumer-organizer few-shots toward work/projects/ relationships/recurring context, the actual usage pattern here.
76 lines
3.2 KiB
Bash
Executable file
76 lines
3.2 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
#
|
|
# Weekly Qdrant restore sanity check.
|
|
#
|
|
# Finds the most recent backup tarball for SOURCE_COLLECTION, restores it into
|
|
# a transient collection, asserts the restored point count >= production, and
|
|
# cleans up. Exits non-zero on any failure so cron alerting catches it.
|
|
#
|
|
# Env vars:
|
|
# QDRANT_CONTAINER container name (default: mem0-qdrant)
|
|
# BACKUP_DIR local backup root
|
|
# (default: ~/aistuff/mem0/backups/qdrant)
|
|
# SOURCE_COLLECTION collection to verify (default: mem0_v3)
|
|
# TEST_COLLECTION transient collection name
|
|
# (default: mem0_v3_restore_test)
|
|
#
|
|
# Suggested cron (weekly Sunday 04:00 UTC):
|
|
# 0 4 * * 0 /home/ubuntu/aistuff/mem0/scripts/restore_test.sh >> /home/ubuntu/aistuff/mem0/backups/restore_test.log 2>&1
|
|
|
|
set -euo pipefail
|
|
|
|
QDRANT_CONTAINER="${QDRANT_CONTAINER:-mem0-qdrant}"
|
|
BACKUP_DIR="${BACKUP_DIR:-$HOME/aistuff/mem0/backups/qdrant}"
|
|
SOURCE_COLLECTION="${SOURCE_COLLECTION:-mem0_v3}"
|
|
TEST_COLLECTION="${TEST_COLLECTION:-mem0_v3_restore_test}"
|
|
|
|
log() { printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*"; }
|
|
|
|
# Pick the most recently-modified backup file matching SOURCE_COLLECTION_*.
|
|
latest="$(find "$BACKUP_DIR" -type f -name "${SOURCE_COLLECTION}_*" -printf '%T@ %p\n' 2>/dev/null \
|
|
| sort -nr | head -1 | cut -d' ' -f2-)"
|
|
if [ -z "$latest" ]; then
|
|
log "ERROR: no backup found under $BACKUP_DIR matching ${SOURCE_COLLECTION}_*"
|
|
exit 1
|
|
fi
|
|
log "latest backup: $latest"
|
|
|
|
prod_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \
|
|
"http://localhost:6333/collections/$SOURCE_COLLECTION/points/count" \
|
|
-H "Content-Type: application/json" \
|
|
-d '{"exact":true}' \
|
|
| python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])')
|
|
log "production count ($SOURCE_COLLECTION): $prod_count"
|
|
|
|
# Drop any leftover test collection from a previous failed run.
|
|
docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \
|
|
"http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null 2>&1 || true
|
|
|
|
snap_basename="$(basename "$latest")"
|
|
log "copying snapshot into container: /tmp/$snap_basename"
|
|
docker cp "$latest" "$QDRANT_CONTAINER:/tmp/$snap_basename"
|
|
|
|
log "restoring into $TEST_COLLECTION"
|
|
docker exec "$QDRANT_CONTAINER" curl -fsS -X PUT \
|
|
"http://localhost:6333/collections/$TEST_COLLECTION/snapshots/recover" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"location\":\"file:///tmp/$snap_basename\",\"priority\":\"snapshot\"}" \
|
|
>/dev/null
|
|
|
|
restored_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \
|
|
"http://localhost:6333/collections/$TEST_COLLECTION/points/count" \
|
|
-H "Content-Type: application/json" \
|
|
-d '{"exact":true}' \
|
|
| python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])')
|
|
log "restored count: $restored_count"
|
|
|
|
# Cleanup whether or not the assertion passes.
|
|
docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \
|
|
"http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null
|
|
docker exec "$QDRANT_CONTAINER" rm -f "/tmp/$snap_basename"
|
|
|
|
if [ "$restored_count" -lt "$prod_count" ]; then
|
|
log "FAIL: restored=$restored_count < production=$prod_count"
|
|
exit 1
|
|
fi
|
|
log "OK: restored=$restored_count >= production=$prod_count"
|