98 lines
3.2 KiB
Bash
Executable File
98 lines
3.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# backfill_openwebui.sh
|
|
# Upload existing local files (transcripts, web snapshots, other .txt/.md/.html)
|
|
# into your OpenWebUI Knowledge Base using values from .env (OPENWEBUI_URL, OPENWEBUI_API_KEY, OPENWEBUI_KB_NAME).
|
|
#
|
|
# Usage:
|
|
# ./tools/backfill_openwebui.sh # default paths (./transcripts and ./library/web)
|
|
# ./tools/backfill_openwebui.sh /extra/folder1 ... # optional extra folders to scan
|
|
#
|
|
# Requirements: bash, curl, jq, find, xargs
|
|
set -euo pipefail
|
|
|
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
ENV_FILE="$ROOT_DIR/.env"
|
|
|
|
if [[ ! -f "$ENV_FILE" ]]; then
|
|
echo "ERROR: .env not found in repo root. Copy .env.example to .env and fill values."
|
|
exit 1
|
|
fi
|
|
|
|
# Load .env (simple parser for KEY=VALUE, ignoring comments)
|
|
export $(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$ENV_FILE" | sed 's/#.*//' | xargs -0 echo)
|
|
|
|
: "${OPENWEBUI_URL:?Set OPENWEBUI_URL in .env}"
|
|
: "${OPENWEBUI_API_KEY:?Set OPENWEBUI_API_KEY in .env}"
|
|
KB_NAME="${OPENWEBUI_KB_NAME:-Homelab Library}"
|
|
|
|
BASE="$OPENWEBUI_URL"
|
|
KEY="$OPENWEBUI_API_KEY"
|
|
|
|
command -v jq >/dev/null || { echo "ERROR: jq is required"; exit 1; }
|
|
command -v curl >/dev/null || { echo "ERROR: curl is required"; exit 1; }
|
|
|
|
# Resolve or create the Knowledge Base
|
|
echo "Resolving Knowledge Base: ${KB_NAME}"
|
|
KB_ID="$(curl -s -H "Authorization: Bearer $KEY" "$BASE/api/v1/knowledge/list" \
|
|
| jq -r --arg KB "$KB_NAME" '.data[] | select(.name==$KB) | .id' || true)"
|
|
|
|
if [[ -z "$KB_ID" || "$KB_ID" == "null" ]]; then
|
|
echo "Creating Knowledge Base: ${KB_NAME}"
|
|
KB_ID="$(curl -s -X POST "$BASE/api/v1/knowledge/create" \
|
|
-H "Authorization: Bearer $KEY" -H "Content-Type: application/json" \
|
|
-d "{\"name\":\"$KB_NAME\",\"description\":\"All local content indexed by podx\"}" \
|
|
| jq -r '.data.id')"
|
|
fi
|
|
|
|
if [[ -z "$KB_ID" || "$KB_ID" == "null" ]]; then
|
|
echo "ERROR: Could not get or create KB"; exit 1
|
|
fi
|
|
|
|
upload_and_attach() {
|
|
local file="$1"
|
|
local fname
|
|
fname="$(basename "$file")"
|
|
echo "Uploading: $file"
|
|
FID="$(curl -s -X POST "$BASE/api/v1/files/" \
|
|
-H "Authorization: Bearer $KEY" \
|
|
-F "file=@${file};filename=${fname}" | jq -r '.data.id')"
|
|
if [[ -z "$FID" || "$FID" == "null" ]]; then
|
|
echo "WARN: upload failed for $file"; return 0
|
|
fi
|
|
curl -s -X POST "$BASE/api/v1/knowledge/$KB_ID/file/add" \
|
|
-H "Authorization: Bearer $KEY" -H "Content-Type: application/json" \
|
|
-d "{\"file_id\":\"$FID\"}" >/dev/null || {
|
|
echo "WARN: attach failed for $file"
|
|
}
|
|
}
|
|
|
|
# Default folders
|
|
declare -a SCAN_DIRS
|
|
SCAN_DIRS+=( "$ROOT_DIR/transcripts" )
|
|
SCAN_DIRS+=( "$ROOT_DIR/library/web" )
|
|
|
|
# Additional user-provided folders
|
|
if (( "$#" > 0 )); then
|
|
for d in "$@"; do
|
|
SCAN_DIRS+=( "$d" )
|
|
done
|
|
fi
|
|
|
|
# Patterns to include
|
|
INCLUDE_PATTERNS=( -name '*.txt' -o -name '*.md' -o -name '*.html' -o -name '*.htm' )
|
|
|
|
# Iterate folders
|
|
for D in "${SCAN_DIRS[@]}"; do
|
|
if [[ -d "$D" ]]; then
|
|
echo "Scanning: $D"
|
|
# Use -print0 to handle spaces; upload sequentially
|
|
# shellcheck disable=SC2068
|
|
find "$D" -type f \( ${INCLUDE_PATTERNS[@]} \) -print0 | \
|
|
xargs -0 -I{} bash -c 'upload_and_attach "$@"' _ {}
|
|
else
|
|
echo "Skip (not a directory): $D"
|
|
fi
|
|
done
|
|
|
|
echo "Backfill finished. Check OpenWebUI → Knowledge → ${KB_NAME}."
|