Fixing metadata import errors

This commit is contained in:
2025-09-24 11:38:53 +02:00
parent 73e89b9a67
commit 6240e86650
6 changed files with 294 additions and 53 deletions

View File

@@ -1,11 +1,15 @@
#!/usr/bin/env python3
import os, sys
import os, sys, json
from pathlib import Path
import requests, orjson
OWUI_URL = os.getenv("OPENWEBUI_URL", "").rstrip("/")
OWUI_KEY = os.getenv("OPENWEBUI_API_KEY", "")
OWUI_KB = os.getenv("OPENWEBUI_KB_NAME", "Homelab Library")
OWUI_AUTO_FIX_METADATA = os.getenv("OPENWEBUI_AUTO_FIX_METADATA", "1").strip().lower() not in ("0", "false", "no")
OWUI_METADATA_TEMPLATE_JSON = os.getenv("OPENWEBUI_METADATA_TEMPLATE_JSON", "").strip()
_TEMPLATE_PATCHED = False
LIB = Path(os.getenv("LIBRARY_ROOT", "./library"))
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "./transcripts"))
@@ -28,19 +32,76 @@ def get_or_create_kb():
r.raise_for_status()
return r.json()["data"]["id"]
def metadata_template_payload():
if not OWUI_METADATA_TEMPLATE_JSON:
return {}
try:
return json.loads(OWUI_METADATA_TEMPLATE_JSON)
except Exception:
return OWUI_METADATA_TEMPLATE_JSON
def ensure_metadata_template(kb_id: str, force: bool = False) -> bool:
global _TEMPLATE_PATCHED
if not OWUI_AUTO_FIX_METADATA or not kb_id:
return False
if not force and _TEMPLATE_PATCHED:
return False
payload_variants = []
template_payload = metadata_template_payload()
payload_variants.append({"metadata_template": template_payload})
if template_payload not in ({}, "", None):
payload_variants.append({"metadata_template": {}})
payload_variants.append({"metadata_template": None})
hdrs = {**headers(), "Content-Type": "application/json"}
url = f"{OWUI_URL}/api/v1/knowledge/{kb_id}"
for payload in payload_variants:
try:
body = orjson.dumps(payload)
except Exception:
body = json.dumps(payload).encode("utf-8")
for method in ("PATCH", "PUT"):
try:
resp = requests.request(method, url, headers=hdrs, data=body, timeout=10)
except Exception:
continue
if resp.status_code in (200, 201, 202, 204):
_TEMPLATE_PATCHED = True
print(f"Applied metadata template via {method} for KB {kb_id}")
return True
return False
def upload_and_attach(path: Path, kb_id: str):
ensure_metadata_template(kb_id)
with open(path, "rb") as f:
r = requests.post(f"{OWUI_URL}/api/v1/files/", headers=headers(), files={"file": (path.name, f)}, timeout=60*10)
r.raise_for_status()
file_id = r.json()["data"]["id"]
payload = {"file_id": file_id}
body = orjson.dumps(payload)
hdrs = {**headers(), "Content-Type": "application/json"}
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
headers={**headers(), "Content-Type":"application/json"},
data=orjson.dumps({"file_id": file_id}), timeout=60)
headers=hdrs,
data=body, timeout=60)
if r.status_code == 400 and OWUI_AUTO_FIX_METADATA:
txt = ""
try:
txt = r.text.lower()
except Exception:
txt = str(r.content).lower()
if "metadata" in txt and ensure_metadata_template(kb_id, force=True):
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
headers=hdrs,
data=body, timeout=60)
r.raise_for_status()
print(f"Uploaded {path}")
def main():
kb_id = get_or_create_kb()
ensure_metadata_template(kb_id)
# transcripts
for txt in TRN.glob("*.txt"):
upload_and_attach(txt, kb_id)

View File

@@ -63,6 +63,10 @@ fi
: "${OPENWEBUI_API_KEY:=}"
: "${OPENWEBUI_KB_ID:=}"
: "${OPENWEBUI_WAIT_SECS:=180}"
: "${OPENWEBUI_AUTO_FIX_METADATA:=1}"
: "${OPENWEBUI_METADATA_TEMPLATE_JSON:=}"
__OWUI_METADATA_PATCHED=""
# ------------------------------ Helpers ------------------------------
_require() {
@@ -142,6 +146,59 @@ PY
printf '%s' "${__id:-}"
}
_owui_metadata_template_payload() {
python3 - "$OPENWEBUI_METADATA_TEMPLATE_JSON" <<'PY'
import sys, json
raw = sys.argv[1] if len(sys.argv) > 1 else ""
raw = (raw or "").strip()
if not raw:
payload = {"metadata_template": {}}
else:
try:
payload = {"metadata_template": json.loads(raw)}
except Exception:
payload = {"metadata_template": raw}
print(json.dumps(payload))
PY
}
_owui_fix_metadata_template() {
local kb_id="$1" force="${2:-0}"
case "${OPENWEBUI_AUTO_FIX_METADATA,,}" in
0|false|no) return 1 ;;
esac
[ -z "$kb_id" ] && return 1
if [ "$force" != "1" ]; then
for existing in $__OWUI_METADATA_PATCHED; do
[ "$existing" = "$kb_id" ] && return 0
done
fi
local payload methods http_code tmp_body tmp_code
payload="$(_owui_metadata_template_payload)"
methods=(PATCH PUT)
for method in "${methods[@]}"; do
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"
curl -sS -X "$method" \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "$payload" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$kb_id" >"$tmp_code" || true
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
rm -f "$tmp_body" "$tmp_code"
case "$http_code" in
200|201|202|204)
__OWUI_METADATA_PATCHED="${__OWUI_METADATA_PATCHED} $kb_id"
echo "[owui] metadata template adjusted via $method for KB $kb_id"
return 0
;;
0|405) ;;
esac
done
return 1
}
# ------------------------------ OWUI file helpers ------------------------------
_owui_file_get() {
local fid="$1"
@@ -551,33 +608,52 @@ PY
KB_ID="$(_kb_id_by_name "$kb_name")"
echo "[owui] attaching to KB: $kb_name (id: ${KB_ID:-<none>})"
[ -z "$KB_ID" ] && { echo "KB '$kb_name' not found (or ambiguous)." >&2; exit 1; }
_owui_fix_metadata_template "$KB_ID" || true
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "{\"file_id\":\"$FILE_ID\"}" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$KB_ID/file/add" >"$tmp_code" || true
curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"
echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
attach_payload="{\"file_id\":\"$FILE_ID\"}"
attempt=0
while :; do
attempt=$((attempt+1))
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "$attach_payload" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$KB_ID/file/add" >"$tmp_code" || true
curl_exit=$?
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"
echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
case "$http_code" in
200|201|204) : ;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."; exit 0
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
if [ "$http_code" = "400" ] && printf '%s' "$RESP" | grep -qi "metadata"; then
if [ "$attempt" -lt 3 ] && _owui_fix_metadata_template "$KB_ID" 1; then
echo "[owui] retrying attach after metadata template fix"
continue
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
fi
case "$http_code" in
200|201|204)
break
;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."
break
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
done
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
;;
owui-attach-id)
shift || true; kb_id="${1:-}"; file="${2:-}"
@@ -627,31 +703,51 @@ PY
echo "[owui] WARNING: timed out waiting for file extraction; attach may fail" >&2
fi
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "{\"file_id\":\"$FILE_ID\"}" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$kb_id/file/add" >"$tmp_code" || true
curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"; echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
_owui_fix_metadata_template "$kb_id" || true
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
case "$http_code" in
200|201|204) : ;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."; exit 0
attach_payload="{\"file_id\":\"$FILE_ID\"}"
attempt=0
while :; do
attempt=$((attempt+1))
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "$attach_payload" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$kb_id/file/add" >"$tmp_code" || true
curl_exit=$?
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"; echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
if [ "$http_code" = "400" ] && printf '%s' "$RESP" | grep -qi "metadata"; then
if [ "$attempt" -lt 3 ] && _owui_fix_metadata_template "$kb_id" 1; then
echo "[owui] retrying attach after metadata template fix"
continue
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
fi
case "$http_code" in
200|201|204)
break
;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."
break
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
done
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
;;
owui-kb-files)
shift || true; kb_name="${1:-}"
@@ -753,4 +849,4 @@ PY
_help
exit 1
;;
esac
esac