Files
podx/scripts/backfill_openwebui.py

114 lines
4.1 KiB
Python
Executable File

#!/usr/bin/env python3
import os, sys, json
from pathlib import Path
import requests, orjson
OWUI_URL = os.getenv("OPENWEBUI_URL", "").rstrip("/")
OWUI_KEY = os.getenv("OPENWEBUI_API_KEY", "")
OWUI_KB = os.getenv("OPENWEBUI_KB_NAME", "Homelab Library")
OWUI_AUTO_FIX_METADATA = os.getenv("OPENWEBUI_AUTO_FIX_METADATA", "1").strip().lower() not in ("0", "false", "no")
OWUI_METADATA_TEMPLATE_JSON = os.getenv("OPENWEBUI_METADATA_TEMPLATE_JSON", "").strip()
_TEMPLATE_PATCHED = False
LIB = Path(os.getenv("LIBRARY_ROOT", "./library"))
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "./transcripts"))
def headers():
return {"Authorization": f"Bearer {OWUI_KEY}"} if OWUI_KEY else {}
def get_or_create_kb():
if not OWUI_URL or not OWUI_KEY:
print("OpenWebUI not configured.")
sys.exit(1)
r = requests.get(f"{OWUI_URL}/api/v1/knowledge/list", headers=headers(), timeout=15)
r.raise_for_status()
for kb in r.json().get("data", []):
if kb.get("name") == OWUI_KB:
return kb["id"]
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/create",
headers={**headers(), "Content-Type":"application/json"},
data=orjson.dumps({"name": OWUI_KB, "description": "All local content indexed by podx"}))
r.raise_for_status()
return r.json()["data"]["id"]
def metadata_template_payload():
if not OWUI_METADATA_TEMPLATE_JSON:
return {}
try:
return json.loads(OWUI_METADATA_TEMPLATE_JSON)
except Exception:
return OWUI_METADATA_TEMPLATE_JSON
def ensure_metadata_template(kb_id: str, force: bool = False) -> bool:
global _TEMPLATE_PATCHED
if not OWUI_AUTO_FIX_METADATA or not kb_id:
return False
if not force and _TEMPLATE_PATCHED:
return False
payload_variants = []
template_payload = metadata_template_payload()
payload_variants.append({"metadata_template": template_payload})
if template_payload not in ({}, "", None):
payload_variants.append({"metadata_template": {}})
payload_variants.append({"metadata_template": None})
hdrs = {**headers(), "Content-Type": "application/json"}
url = f"{OWUI_URL}/api/v1/knowledge/{kb_id}"
for payload in payload_variants:
try:
body = orjson.dumps(payload)
except Exception:
body = json.dumps(payload).encode("utf-8")
for method in ("PATCH", "PUT"):
try:
resp = requests.request(method, url, headers=hdrs, data=body, timeout=10)
except Exception:
continue
if resp.status_code in (200, 201, 202, 204):
_TEMPLATE_PATCHED = True
print(f"Applied metadata template via {method} for KB {kb_id}")
return True
return False
def upload_and_attach(path: Path, kb_id: str):
ensure_metadata_template(kb_id)
with open(path, "rb") as f:
r = requests.post(f"{OWUI_URL}/api/v1/files/", headers=headers(), files={"file": (path.name, f)}, timeout=60*10)
r.raise_for_status()
file_id = r.json()["data"]["id"]
payload = {"file_id": file_id}
body = orjson.dumps(payload)
hdrs = {**headers(), "Content-Type": "application/json"}
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
headers=hdrs,
data=body, timeout=60)
if r.status_code == 400 and OWUI_AUTO_FIX_METADATA:
txt = ""
try:
txt = r.text.lower()
except Exception:
txt = str(r.content).lower()
if "metadata" in txt and ensure_metadata_template(kb_id, force=True):
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
headers=hdrs,
data=body, timeout=60)
r.raise_for_status()
print(f"Uploaded {path}")
def main():
kb_id = get_or_create_kb()
ensure_metadata_template(kb_id)
# transcripts
for txt in TRN.glob("*.txt"):
upload_and_attach(txt, kb_id)
# web snapshots
for txt in LIB.glob("web/**/*.txt"):
upload_and_attach(txt, kb_id)
if __name__ == "__main__":
main()