114 lines
4.1 KiB
Python
Executable File
114 lines
4.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import os, sys, json
|
|
from pathlib import Path
|
|
import requests, orjson
|
|
|
|
OWUI_URL = os.getenv("OPENWEBUI_URL", "").rstrip("/")
|
|
OWUI_KEY = os.getenv("OPENWEBUI_API_KEY", "")
|
|
OWUI_KB = os.getenv("OPENWEBUI_KB_NAME", "Homelab Library")
|
|
OWUI_AUTO_FIX_METADATA = os.getenv("OPENWEBUI_AUTO_FIX_METADATA", "1").strip().lower() not in ("0", "false", "no")
|
|
OWUI_METADATA_TEMPLATE_JSON = os.getenv("OPENWEBUI_METADATA_TEMPLATE_JSON", "").strip()
|
|
|
|
_TEMPLATE_PATCHED = False
|
|
|
|
LIB = Path(os.getenv("LIBRARY_ROOT", "./library"))
|
|
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "./transcripts"))
|
|
|
|
def headers():
|
|
return {"Authorization": f"Bearer {OWUI_KEY}"} if OWUI_KEY else {}
|
|
|
|
def get_or_create_kb():
|
|
if not OWUI_URL or not OWUI_KEY:
|
|
print("OpenWebUI not configured.")
|
|
sys.exit(1)
|
|
r = requests.get(f"{OWUI_URL}/api/v1/knowledge/list", headers=headers(), timeout=15)
|
|
r.raise_for_status()
|
|
for kb in r.json().get("data", []):
|
|
if kb.get("name") == OWUI_KB:
|
|
return kb["id"]
|
|
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/create",
|
|
headers={**headers(), "Content-Type":"application/json"},
|
|
data=orjson.dumps({"name": OWUI_KB, "description": "All local content indexed by podx"}))
|
|
r.raise_for_status()
|
|
return r.json()["data"]["id"]
|
|
|
|
|
|
def metadata_template_payload():
|
|
if not OWUI_METADATA_TEMPLATE_JSON:
|
|
return {}
|
|
try:
|
|
return json.loads(OWUI_METADATA_TEMPLATE_JSON)
|
|
except Exception:
|
|
return OWUI_METADATA_TEMPLATE_JSON
|
|
|
|
|
|
def ensure_metadata_template(kb_id: str, force: bool = False) -> bool:
|
|
global _TEMPLATE_PATCHED
|
|
if not OWUI_AUTO_FIX_METADATA or not kb_id:
|
|
return False
|
|
if not force and _TEMPLATE_PATCHED:
|
|
return False
|
|
|
|
payload_variants = []
|
|
template_payload = metadata_template_payload()
|
|
payload_variants.append({"metadata_template": template_payload})
|
|
if template_payload not in ({}, "", None):
|
|
payload_variants.append({"metadata_template": {}})
|
|
payload_variants.append({"metadata_template": None})
|
|
|
|
hdrs = {**headers(), "Content-Type": "application/json"}
|
|
url = f"{OWUI_URL}/api/v1/knowledge/{kb_id}"
|
|
for payload in payload_variants:
|
|
try:
|
|
body = orjson.dumps(payload)
|
|
except Exception:
|
|
body = json.dumps(payload).encode("utf-8")
|
|
for method in ("PATCH", "PUT"):
|
|
try:
|
|
resp = requests.request(method, url, headers=hdrs, data=body, timeout=10)
|
|
except Exception:
|
|
continue
|
|
if resp.status_code in (200, 201, 202, 204):
|
|
_TEMPLATE_PATCHED = True
|
|
print(f"Applied metadata template via {method} for KB {kb_id}")
|
|
return True
|
|
return False
|
|
|
|
def upload_and_attach(path: Path, kb_id: str):
|
|
ensure_metadata_template(kb_id)
|
|
with open(path, "rb") as f:
|
|
r = requests.post(f"{OWUI_URL}/api/v1/files/", headers=headers(), files={"file": (path.name, f)}, timeout=60*10)
|
|
r.raise_for_status()
|
|
file_id = r.json()["data"]["id"]
|
|
payload = {"file_id": file_id}
|
|
body = orjson.dumps(payload)
|
|
hdrs = {**headers(), "Content-Type": "application/json"}
|
|
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
|
|
headers=hdrs,
|
|
data=body, timeout=60)
|
|
if r.status_code == 400 and OWUI_AUTO_FIX_METADATA:
|
|
txt = ""
|
|
try:
|
|
txt = r.text.lower()
|
|
except Exception:
|
|
txt = str(r.content).lower()
|
|
if "metadata" in txt and ensure_metadata_template(kb_id, force=True):
|
|
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
|
|
headers=hdrs,
|
|
data=body, timeout=60)
|
|
r.raise_for_status()
|
|
print(f"Uploaded {path}")
|
|
|
|
def main():
|
|
kb_id = get_or_create_kb()
|
|
ensure_metadata_template(kb_id)
|
|
# transcripts
|
|
for txt in TRN.glob("*.txt"):
|
|
upload_and_attach(txt, kb_id)
|
|
# web snapshots
|
|
for txt in LIB.glob("web/**/*.txt"):
|
|
upload_and_attach(txt, kb_id)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|