Fixing metadata import errors
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
import os, sys
|
||||
import os, sys, json
|
||||
from pathlib import Path
|
||||
import requests, orjson
|
||||
|
||||
OWUI_URL = os.getenv("OPENWEBUI_URL", "").rstrip("/")
|
||||
OWUI_KEY = os.getenv("OPENWEBUI_API_KEY", "")
|
||||
OWUI_KB = os.getenv("OPENWEBUI_KB_NAME", "Homelab Library")
|
||||
OWUI_AUTO_FIX_METADATA = os.getenv("OPENWEBUI_AUTO_FIX_METADATA", "1").strip().lower() not in ("0", "false", "no")
|
||||
OWUI_METADATA_TEMPLATE_JSON = os.getenv("OPENWEBUI_METADATA_TEMPLATE_JSON", "").strip()
|
||||
|
||||
_TEMPLATE_PATCHED = False
|
||||
|
||||
LIB = Path(os.getenv("LIBRARY_ROOT", "./library"))
|
||||
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "./transcripts"))
|
||||
@@ -28,19 +32,76 @@ def get_or_create_kb():
|
||||
r.raise_for_status()
|
||||
return r.json()["data"]["id"]
|
||||
|
||||
|
||||
def metadata_template_payload():
|
||||
if not OWUI_METADATA_TEMPLATE_JSON:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(OWUI_METADATA_TEMPLATE_JSON)
|
||||
except Exception:
|
||||
return OWUI_METADATA_TEMPLATE_JSON
|
||||
|
||||
|
||||
def ensure_metadata_template(kb_id: str, force: bool = False) -> bool:
|
||||
global _TEMPLATE_PATCHED
|
||||
if not OWUI_AUTO_FIX_METADATA or not kb_id:
|
||||
return False
|
||||
if not force and _TEMPLATE_PATCHED:
|
||||
return False
|
||||
|
||||
payload_variants = []
|
||||
template_payload = metadata_template_payload()
|
||||
payload_variants.append({"metadata_template": template_payload})
|
||||
if template_payload not in ({}, "", None):
|
||||
payload_variants.append({"metadata_template": {}})
|
||||
payload_variants.append({"metadata_template": None})
|
||||
|
||||
hdrs = {**headers(), "Content-Type": "application/json"}
|
||||
url = f"{OWUI_URL}/api/v1/knowledge/{kb_id}"
|
||||
for payload in payload_variants:
|
||||
try:
|
||||
body = orjson.dumps(payload)
|
||||
except Exception:
|
||||
body = json.dumps(payload).encode("utf-8")
|
||||
for method in ("PATCH", "PUT"):
|
||||
try:
|
||||
resp = requests.request(method, url, headers=hdrs, data=body, timeout=10)
|
||||
except Exception:
|
||||
continue
|
||||
if resp.status_code in (200, 201, 202, 204):
|
||||
_TEMPLATE_PATCHED = True
|
||||
print(f"Applied metadata template via {method} for KB {kb_id}")
|
||||
return True
|
||||
return False
|
||||
|
||||
def upload_and_attach(path: Path, kb_id: str):
|
||||
ensure_metadata_template(kb_id)
|
||||
with open(path, "rb") as f:
|
||||
r = requests.post(f"{OWUI_URL}/api/v1/files/", headers=headers(), files={"file": (path.name, f)}, timeout=60*10)
|
||||
r.raise_for_status()
|
||||
file_id = r.json()["data"]["id"]
|
||||
payload = {"file_id": file_id}
|
||||
body = orjson.dumps(payload)
|
||||
hdrs = {**headers(), "Content-Type": "application/json"}
|
||||
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
|
||||
headers={**headers(), "Content-Type":"application/json"},
|
||||
data=orjson.dumps({"file_id": file_id}), timeout=60)
|
||||
headers=hdrs,
|
||||
data=body, timeout=60)
|
||||
if r.status_code == 400 and OWUI_AUTO_FIX_METADATA:
|
||||
txt = ""
|
||||
try:
|
||||
txt = r.text.lower()
|
||||
except Exception:
|
||||
txt = str(r.content).lower()
|
||||
if "metadata" in txt and ensure_metadata_template(kb_id, force=True):
|
||||
r = requests.post(f"{OWUI_URL}/api/v1/knowledge/{kb_id}/file/add",
|
||||
headers=hdrs,
|
||||
data=body, timeout=60)
|
||||
r.raise_for_status()
|
||||
print(f"Uploaded {path}")
|
||||
|
||||
def main():
|
||||
kb_id = get_or_create_kb()
|
||||
ensure_metadata_template(kb_id)
|
||||
# transcripts
|
||||
for txt in TRN.glob("*.txt"):
|
||||
upload_and_attach(txt, kb_id)
|
||||
|
@@ -63,6 +63,10 @@ fi
|
||||
: "${OPENWEBUI_API_KEY:=}"
|
||||
: "${OPENWEBUI_KB_ID:=}"
|
||||
: "${OPENWEBUI_WAIT_SECS:=180}"
|
||||
: "${OPENWEBUI_AUTO_FIX_METADATA:=1}"
|
||||
: "${OPENWEBUI_METADATA_TEMPLATE_JSON:=}"
|
||||
|
||||
__OWUI_METADATA_PATCHED=""
|
||||
|
||||
# ------------------------------ Helpers ------------------------------
|
||||
_require() {
|
||||
@@ -142,6 +146,59 @@ PY
|
||||
printf '%s' "${__id:-}"
|
||||
}
|
||||
|
||||
_owui_metadata_template_payload() {
|
||||
python3 - "$OPENWEBUI_METADATA_TEMPLATE_JSON" <<'PY'
|
||||
import sys, json
|
||||
raw = sys.argv[1] if len(sys.argv) > 1 else ""
|
||||
raw = (raw or "").strip()
|
||||
if not raw:
|
||||
payload = {"metadata_template": {}}
|
||||
else:
|
||||
try:
|
||||
payload = {"metadata_template": json.loads(raw)}
|
||||
except Exception:
|
||||
payload = {"metadata_template": raw}
|
||||
print(json.dumps(payload))
|
||||
PY
|
||||
}
|
||||
|
||||
_owui_fix_metadata_template() {
|
||||
local kb_id="$1" force="${2:-0}"
|
||||
case "${OPENWEBUI_AUTO_FIX_METADATA,,}" in
|
||||
0|false|no) return 1 ;;
|
||||
esac
|
||||
[ -z "$kb_id" ] && return 1
|
||||
if [ "$force" != "1" ]; then
|
||||
for existing in $__OWUI_METADATA_PATCHED; do
|
||||
[ "$existing" = "$kb_id" ] && return 0
|
||||
done
|
||||
fi
|
||||
|
||||
local payload methods http_code tmp_body tmp_code
|
||||
payload="$(_owui_metadata_template_payload)"
|
||||
methods=(PATCH PUT)
|
||||
for method in "${methods[@]}"; do
|
||||
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"
|
||||
curl -sS -X "$method" \
|
||||
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$payload" \
|
||||
-w "%{http_code}" --output "$tmp_body" \
|
||||
"$(_owui_url)/api/v1/knowledge/$kb_id" >"$tmp_code" || true
|
||||
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
|
||||
rm -f "$tmp_body" "$tmp_code"
|
||||
case "$http_code" in
|
||||
200|201|202|204)
|
||||
__OWUI_METADATA_PATCHED="${__OWUI_METADATA_PATCHED} $kb_id"
|
||||
echo "[owui] metadata template adjusted via $method for KB $kb_id"
|
||||
return 0
|
||||
;;
|
||||
0|405) ;;
|
||||
esac
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# ------------------------------ OWUI file helpers ------------------------------
|
||||
_owui_file_get() {
|
||||
local fid="$1"
|
||||
@@ -551,33 +608,52 @@ PY
|
||||
KB_ID="$(_kb_id_by_name "$kb_name")"
|
||||
echo "[owui] attaching to KB: $kb_name (id: ${KB_ID:-<none>})"
|
||||
[ -z "$KB_ID" ] && { echo "KB '$kb_name' not found (or ambiguous)." >&2; exit 1; }
|
||||
_owui_fix_metadata_template "$KB_ID" || true
|
||||
|
||||
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
|
||||
curl -sS -X POST \
|
||||
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"file_id\":\"$FILE_ID\"}" \
|
||||
-D "$tmp_hdrs" \
|
||||
-w "%{http_code}" --output "$tmp_body" \
|
||||
"$(_owui_url)/api/v1/knowledge/$KB_ID/file/add" >"$tmp_code" || true
|
||||
curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
|
||||
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
|
||||
RESP="$(cat "$tmp_body")"
|
||||
echo "$RESP" | ppjson
|
||||
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
|
||||
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
|
||||
attach_payload="{\"file_id\":\"$FILE_ID\"}"
|
||||
attempt=0
|
||||
while :; do
|
||||
attempt=$((attempt+1))
|
||||
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
|
||||
curl -sS -X POST \
|
||||
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$attach_payload" \
|
||||
-D "$tmp_hdrs" \
|
||||
-w "%{http_code}" --output "$tmp_body" \
|
||||
"$(_owui_url)/api/v1/knowledge/$KB_ID/file/add" >"$tmp_code" || true
|
||||
curl_exit=$?
|
||||
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
|
||||
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
|
||||
RESP="$(cat "$tmp_body")"
|
||||
echo "$RESP" | ppjson
|
||||
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
|
||||
|
||||
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
|
||||
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
|
||||
case "$http_code" in
|
||||
200|201|204) : ;;
|
||||
*)
|
||||
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
|
||||
echo "[owui] duplicate content — already indexed. Treating as success."; exit 0
|
||||
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
|
||||
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
|
||||
|
||||
if [ "$http_code" = "400" ] && printf '%s' "$RESP" | grep -qi "metadata"; then
|
||||
if [ "$attempt" -lt 3 ] && _owui_fix_metadata_template "$KB_ID" 1; then
|
||||
echo "[owui] retrying attach after metadata template fix"
|
||||
continue
|
||||
fi
|
||||
echo "Attach failed (HTTP $http_code)" >&2; exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
case "$http_code" in
|
||||
200|201|204)
|
||||
break
|
||||
;;
|
||||
*)
|
||||
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
|
||||
echo "[owui] duplicate content — already indexed. Treating as success."
|
||||
break
|
||||
fi
|
||||
echo "Attach failed (HTTP $http_code)" >&2; exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
|
||||
;;
|
||||
owui-attach-id)
|
||||
shift || true; kb_id="${1:-}"; file="${2:-}"
|
||||
@@ -627,31 +703,51 @@ PY
|
||||
echo "[owui] WARNING: timed out waiting for file extraction; attach may fail" >&2
|
||||
fi
|
||||
|
||||
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
|
||||
curl -sS -X POST \
|
||||
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"file_id\":\"$FILE_ID\"}" \
|
||||
-D "$tmp_hdrs" \
|
||||
-w "%{http_code}" --output "$tmp_body" \
|
||||
"$(_owui_url)/api/v1/knowledge/$kb_id/file/add" >"$tmp_code" || true
|
||||
curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
|
||||
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
|
||||
RESP="$(cat "$tmp_body")"; echo "$RESP" | ppjson
|
||||
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
|
||||
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
|
||||
_owui_fix_metadata_template "$kb_id" || true
|
||||
|
||||
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
|
||||
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
|
||||
case "$http_code" in
|
||||
200|201|204) : ;;
|
||||
*)
|
||||
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
|
||||
echo "[owui] duplicate content — already indexed. Treating as success."; exit 0
|
||||
attach_payload="{\"file_id\":\"$FILE_ID\"}"
|
||||
attempt=0
|
||||
while :; do
|
||||
attempt=$((attempt+1))
|
||||
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
|
||||
curl -sS -X POST \
|
||||
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$attach_payload" \
|
||||
-D "$tmp_hdrs" \
|
||||
-w "%{http_code}" --output "$tmp_body" \
|
||||
"$(_owui_url)/api/v1/knowledge/$kb_id/file/add" >"$tmp_code" || true
|
||||
curl_exit=$?
|
||||
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
|
||||
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
|
||||
RESP="$(cat "$tmp_body")"; echo "$RESP" | ppjson
|
||||
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
|
||||
|
||||
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
|
||||
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
|
||||
|
||||
if [ "$http_code" = "400" ] && printf '%s' "$RESP" | grep -qi "metadata"; then
|
||||
if [ "$attempt" -lt 3 ] && _owui_fix_metadata_template "$kb_id" 1; then
|
||||
echo "[owui] retrying attach after metadata template fix"
|
||||
continue
|
||||
fi
|
||||
echo "Attach failed (HTTP $http_code)" >&2; exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
case "$http_code" in
|
||||
200|201|204)
|
||||
break
|
||||
;;
|
||||
*)
|
||||
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
|
||||
echo "[owui] duplicate content — already indexed. Treating as success."
|
||||
break
|
||||
fi
|
||||
echo "Attach failed (HTTP $http_code)" >&2; exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
|
||||
;;
|
||||
owui-kb-files)
|
||||
shift || true; kb_name="${1:-}"
|
||||
@@ -753,4 +849,4 @@ PY
|
||||
_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
esac
|
||||
|
Reference in New Issue
Block a user