#!/usr/bin/env bash # # Weekly Qdrant restore sanity check. # # Finds the most recent backup tarball for SOURCE_COLLECTION, restores it into # a transient collection, asserts the restored point count >= production, and # cleans up. Exits non-zero on any failure so cron alerting catches it. # # Env vars: # QDRANT_CONTAINER container name (default: mem0-qdrant) # BACKUP_DIR local backup root # (default: ~/aistuff/mem0/backups/qdrant) # SOURCE_COLLECTION collection to verify (default: mem0_v3) # TEST_COLLECTION transient collection name # (default: mem0_v3_restore_test) # # Suggested cron (weekly Sunday 04:00 UTC): # 0 4 * * 0 /home/ubuntu/aistuff/mem0/scripts/restore_test.sh >> /home/ubuntu/aistuff/mem0/backups/restore_test.log 2>&1 set -euo pipefail QDRANT_CONTAINER="${QDRANT_CONTAINER:-mem0-qdrant}" BACKUP_DIR="${BACKUP_DIR:-$HOME/aistuff/mem0/backups/qdrant}" SOURCE_COLLECTION="${SOURCE_COLLECTION:-mem0_v3}" TEST_COLLECTION="${TEST_COLLECTION:-mem0_v3_restore_test}" log() { printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*"; } # Pick the most recently-modified backup file matching SOURCE_COLLECTION_*. latest="$(find "$BACKUP_DIR" -type f -name "${SOURCE_COLLECTION}_*" -printf '%T@ %p\n' 2>/dev/null \ | sort -nr | head -1 | cut -d' ' -f2-)" if [ -z "$latest" ]; then log "ERROR: no backup found under $BACKUP_DIR matching ${SOURCE_COLLECTION}_*" exit 1 fi log "latest backup: $latest" prod_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \ "http://localhost:6333/collections/$SOURCE_COLLECTION/points/count" \ -H "Content-Type: application/json" \ -d '{"exact":true}' \ | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])') log "production count ($SOURCE_COLLECTION): $prod_count" # Drop any leftover test collection from a previous failed run. docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \ "http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null 2>&1 || true snap_basename="$(basename "$latest")" log "copying snapshot into container: /tmp/$snap_basename" docker cp "$latest" "$QDRANT_CONTAINER:/tmp/$snap_basename" log "restoring into $TEST_COLLECTION" docker exec "$QDRANT_CONTAINER" curl -fsS -X PUT \ "http://localhost:6333/collections/$TEST_COLLECTION/snapshots/recover" \ -H "Content-Type: application/json" \ -d "{\"location\":\"file:///tmp/$snap_basename\",\"priority\":\"snapshot\"}" \ >/dev/null restored_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \ "http://localhost:6333/collections/$TEST_COLLECTION/points/count" \ -H "Content-Type: application/json" \ -d '{"exact":true}' \ | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])') log "restored count: $restored_count" # Cleanup whether or not the assertion passes. docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \ "http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null docker exec "$QDRANT_CONTAINER" rm -f "/tmp/$snap_basename" if [ "$restored_count" -lt "$prod_count" ]; then log "FAIL: restored=$restored_count < production=$prod_count" exit 1 fi log "OK: restored=$restored_count >= production=$prod_count"