knowledge-base/scripts/restore_test.sh
Pratik Narola e99b382b16 fix: restore_test.sh uses upload endpoint and tightens source-file glob
file:// snapshot recovery is disabled by default in recent Qdrant
(returns 403 on /collections/.../snapshots/recover with a file:// URL).
Switched to POST /snapshots/upload with multipart form-data which
doesn't need an allowlist.

Also tightened the find -name glob from "${SOURCE_COLLECTION}_*" to
"${SOURCE_COLLECTION}_[0-9]*" so a source named "mem0_v3" does not
accidentally match "mem0_v3_entities_*" files in the same dir.
2026-05-23 19:57:41 +05:30

81 lines
3.5 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Weekly Qdrant restore sanity check.
#
# Finds the most recent backup tarball for SOURCE_COLLECTION, restores it into
# a transient collection, asserts the restored point count >= production, and
# cleans up. Exits non-zero on any failure so cron alerting catches it.
#
# Env vars:
# QDRANT_CONTAINER container name (default: mem0-qdrant)
# BACKUP_DIR local backup root
# (default: ~/aistuff/mem0/backups/qdrant)
# SOURCE_COLLECTION collection to verify (default: mem0_v3)
# TEST_COLLECTION transient collection name
# (default: mem0_v3_restore_test)
#
# Suggested cron (weekly Sunday 04:00 UTC):
# 0 4 * * 0 /home/ubuntu/aistuff/mem0/scripts/restore_test.sh >> /home/ubuntu/aistuff/mem0/backups/restore_test.log 2>&1
set -euo pipefail
QDRANT_CONTAINER="${QDRANT_CONTAINER:-mem0-qdrant}"
BACKUP_DIR="${BACKUP_DIR:-$HOME/aistuff/mem0/backups/qdrant}"
SOURCE_COLLECTION="${SOURCE_COLLECTION:-mem0_v3}"
TEST_COLLECTION="${TEST_COLLECTION:-mem0_v3_restore_test}"
log() { printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*"; }
# Pick the most recent backup for SOURCE_COLLECTION. The "_[0-9]*" suffix
# anchors on the timestamp digit so a collection named "mem0_v3" does NOT
# also match "mem0_v3_entities_*" files in the same directory.
latest="$(find "$BACKUP_DIR" -type f -name "${SOURCE_COLLECTION}_[0-9]*" -printf '%T@ %p\n' 2>/dev/null \
| sort -nr | head -1 | cut -d' ' -f2-)"
if [ -z "$latest" ]; then
log "ERROR: no backup found under $BACKUP_DIR matching ${SOURCE_COLLECTION}_[0-9]*"
exit 1
fi
log "latest backup: $latest"
prod_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \
"http://localhost:6333/collections/$SOURCE_COLLECTION/points/count" \
-H "Content-Type: application/json" \
-d '{"exact":true}' \
| python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])')
log "production count ($SOURCE_COLLECTION): $prod_count"
# Drop any leftover test collection from a previous failed run.
docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \
"http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null 2>&1 || true
snap_basename="$(basename "$latest")"
log "copying snapshot into container: /tmp/$snap_basename"
docker cp "$latest" "$QDRANT_CONTAINER:/tmp/$snap_basename"
# Use the upload endpoint (multipart) rather than recover-from-URL —
# file:// recovery is disabled by default in recent Qdrant (returns 403),
# and upload doesn't need an allowlist.
log "restoring into $TEST_COLLECTION via /snapshots/upload"
docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \
"http://localhost:6333/collections/$TEST_COLLECTION/snapshots/upload?priority=snapshot" \
-H "Content-Type: multipart/form-data" \
-F "snapshot=@/tmp/$snap_basename" \
>/dev/null
restored_count=$(docker exec "$QDRANT_CONTAINER" curl -fsS -X POST \
"http://localhost:6333/collections/$TEST_COLLECTION/points/count" \
-H "Content-Type: application/json" \
-d '{"exact":true}' \
| python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["count"])')
log "restored count: $restored_count"
# Cleanup whether or not the assertion passes.
docker exec "$QDRANT_CONTAINER" curl -fsS -X DELETE \
"http://localhost:6333/collections/$TEST_COLLECTION" >/dev/null
docker exec "$QDRANT_CONTAINER" rm -f "/tmp/$snap_basename"
if [ "$restored_count" -lt "$prod_count" ]; then
log "FAIL: restored=$restored_count < production=$prod_count"
exit 1
fi
log "OK: restored=$restored_count >= production=$prod_count"