#!/usr/bin/env bash # backfill_openwebui.sh # Upload existing local files (transcripts, web snapshots, other .txt/.md/.html) # into your OpenWebUI Knowledge Base using values from .env (OPENWEBUI_URL, OPENWEBUI_API_KEY, OPENWEBUI_KB_NAME). # # Usage: # ./tools/backfill_openwebui.sh # default paths (./transcripts and ./library/web) # ./tools/backfill_openwebui.sh /extra/folder1 ... # optional extra folders to scan # # Requirements: bash, curl, jq, find, xargs set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" ENV_FILE="$ROOT_DIR/.env" if [[ ! -f "$ENV_FILE" ]]; then echo "ERROR: .env not found in repo root. Copy .env.example to .env and fill values." exit 1 fi # Load .env (simple parser for KEY=VALUE, ignoring comments) export $(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$ENV_FILE" | sed 's/#.*//' | xargs -0 echo) : "${OPENWEBUI_URL:?Set OPENWEBUI_URL in .env}" : "${OPENWEBUI_API_KEY:?Set OPENWEBUI_API_KEY in .env}" KB_NAME="${OPENWEBUI_KB_NAME:-Homelab Library}" BASE="$OPENWEBUI_URL" KEY="$OPENWEBUI_API_KEY" command -v jq >/dev/null || { echo "ERROR: jq is required"; exit 1; } command -v curl >/dev/null || { echo "ERROR: curl is required"; exit 1; } # Resolve or create the Knowledge Base echo "Resolving Knowledge Base: ${KB_NAME}" KB_ID="$(curl -s -H "Authorization: Bearer $KEY" "$BASE/api/v1/knowledge/list" \ | jq -r --arg KB "$KB_NAME" '.data[] | select(.name==$KB) | .id' || true)" if [[ -z "$KB_ID" || "$KB_ID" == "null" ]]; then echo "Creating Knowledge Base: ${KB_NAME}" KB_ID="$(curl -s -X POST "$BASE/api/v1/knowledge/create" \ -H "Authorization: Bearer $KEY" -H "Content-Type: application/json" \ -d "{\"name\":\"$KB_NAME\",\"description\":\"All local content indexed by podx\"}" \ | jq -r '.data.id')" fi if [[ -z "$KB_ID" || "$KB_ID" == "null" ]]; then echo "ERROR: Could not get or create KB"; exit 1 fi upload_and_attach() { local file="$1" local fname fname="$(basename "$file")" echo "Uploading: $file" FID="$(curl -s -X POST "$BASE/api/v1/files/" \ -H "Authorization: Bearer $KEY" \ -F "file=@${file};filename=${fname}" | jq -r '.data.id')" if [[ -z "$FID" || "$FID" == "null" ]]; then echo "WARN: upload failed for $file"; return 0 fi curl -s -X POST "$BASE/api/v1/knowledge/$KB_ID/file/add" \ -H "Authorization: Bearer $KEY" -H "Content-Type: application/json" \ -d "{\"file_id\":\"$FID\"}" >/dev/null || { echo "WARN: attach failed for $file" } } # Default folders declare -a SCAN_DIRS SCAN_DIRS+=( "$ROOT_DIR/transcripts" ) SCAN_DIRS+=( "$ROOT_DIR/library/web" ) # Additional user-provided folders if (( "$#" > 0 )); then for d in "$@"; do SCAN_DIRS+=( "$d" ) done fi # Patterns to include INCLUDE_PATTERNS=( -name '*.txt' -o -name '*.md' -o -name '*.html' -o -name '*.htm' ) # Iterate folders for D in "${SCAN_DIRS[@]}"; do if [[ -d "$D" ]]; then echo "Scanning: $D" # Use -print0 to handle spaces; upload sequentially # shellcheck disable=SC2068 find "$D" -type f \( ${INCLUDE_PATTERNS[@]} \) -print0 | \ xargs -0 -I{} bash -c 'upload_and_attach "$@"' _ {} else echo "Skip (not a directory): $D" fi done echo "Backfill finished. Check OpenWebUI → Knowledge → ${KB_NAME}."