Files
podx/tools/backfill_openwebui.sh
2025-09-07 10:42:27 +02:00

98 lines
3.2 KiB
Bash
Executable File

#!/usr/bin/env bash
# backfill_openwebui.sh
# Upload existing local files (transcripts, web snapshots, other .txt/.md/.html)
# into your OpenWebUI Knowledge Base using values from .env (OPENWEBUI_URL, OPENWEBUI_API_KEY, OPENWEBUI_KB_NAME).
#
# Usage:
# ./tools/backfill_openwebui.sh # default paths (./transcripts and ./library/web)
# ./tools/backfill_openwebui.sh /extra/folder1 ... # optional extra folders to scan
#
# Requirements: bash, curl, jq, find, xargs
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="$ROOT_DIR/.env"
if [[ ! -f "$ENV_FILE" ]]; then
echo "ERROR: .env not found in repo root. Copy .env.example to .env and fill values."
exit 1
fi
# Load .env (simple parser for KEY=VALUE, ignoring comments)
export $(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$ENV_FILE" | sed 's/#.*//' | xargs -0 echo)
: "${OPENWEBUI_URL:?Set OPENWEBUI_URL in .env}"
: "${OPENWEBUI_API_KEY:?Set OPENWEBUI_API_KEY in .env}"
KB_NAME="${OPENWEBUI_KB_NAME:-Homelab Library}"
BASE="$OPENWEBUI_URL"
KEY="$OPENWEBUI_API_KEY"
command -v jq >/dev/null || { echo "ERROR: jq is required"; exit 1; }
command -v curl >/dev/null || { echo "ERROR: curl is required"; exit 1; }
# Resolve or create the Knowledge Base
echo "Resolving Knowledge Base: ${KB_NAME}"
KB_ID="$(curl -s -H "Authorization: Bearer $KEY" "$BASE/api/v1/knowledge/list" \
| jq -r --arg KB "$KB_NAME" '.data[] | select(.name==$KB) | .id' || true)"
if [[ -z "$KB_ID" || "$KB_ID" == "null" ]]; then
echo "Creating Knowledge Base: ${KB_NAME}"
KB_ID="$(curl -s -X POST "$BASE/api/v1/knowledge/create" \
-H "Authorization: Bearer $KEY" -H "Content-Type: application/json" \
-d "{\"name\":\"$KB_NAME\",\"description\":\"All local content indexed by podx\"}" \
| jq -r '.data.id')"
fi
if [[ -z "$KB_ID" || "$KB_ID" == "null" ]]; then
echo "ERROR: Could not get or create KB"; exit 1
fi
upload_and_attach() {
local file="$1"
local fname
fname="$(basename "$file")"
echo "Uploading: $file"
FID="$(curl -s -X POST "$BASE/api/v1/files/" \
-H "Authorization: Bearer $KEY" \
-F "file=@${file};filename=${fname}" | jq -r '.data.id')"
if [[ -z "$FID" || "$FID" == "null" ]]; then
echo "WARN: upload failed for $file"; return 0
fi
curl -s -X POST "$BASE/api/v1/knowledge/$KB_ID/file/add" \
-H "Authorization: Bearer $KEY" -H "Content-Type: application/json" \
-d "{\"file_id\":\"$FID\"}" >/dev/null || {
echo "WARN: attach failed for $file"
}
}
# Default folders
declare -a SCAN_DIRS
SCAN_DIRS+=( "$ROOT_DIR/transcripts" )
SCAN_DIRS+=( "$ROOT_DIR/library/web" )
# Additional user-provided folders
if (( "$#" > 0 )); then
for d in "$@"; do
SCAN_DIRS+=( "$d" )
done
fi
# Patterns to include
INCLUDE_PATTERNS=( -name '*.txt' -o -name '*.md' -o -name '*.html' -o -name '*.htm' )
# Iterate folders
for D in "${SCAN_DIRS[@]}"; do
if [[ -d "$D" ]]; then
echo "Scanning: $D"
# Use -print0 to handle spaces; upload sequentially
# shellcheck disable=SC2068
find "$D" -type f \( ${INCLUDE_PATTERNS[@]} \) -print0 | \
xargs -0 -I{} bash -c 'upload_and_attach "$@"' _ {}
else
echo "Skip (not a directory): $D"
fi
done
echo "Backfill finished. Check OpenWebUI → Knowledge → ${KB_NAME}."