Fixing metadata import errors

This commit is contained in:
2025-09-24 11:38:53 +02:00
parent 73e89b9a67
commit 6240e86650
6 changed files with 294 additions and 53 deletions

View File

@@ -63,6 +63,10 @@ fi
: "${OPENWEBUI_API_KEY:=}"
: "${OPENWEBUI_KB_ID:=}"
: "${OPENWEBUI_WAIT_SECS:=180}"
: "${OPENWEBUI_AUTO_FIX_METADATA:=1}"
: "${OPENWEBUI_METADATA_TEMPLATE_JSON:=}"
__OWUI_METADATA_PATCHED=""
# ------------------------------ Helpers ------------------------------
_require() {
@@ -142,6 +146,59 @@ PY
printf '%s' "${__id:-}"
}
_owui_metadata_template_payload() {
python3 - "$OPENWEBUI_METADATA_TEMPLATE_JSON" <<'PY'
import sys, json
raw = sys.argv[1] if len(sys.argv) > 1 else ""
raw = (raw or "").strip()
if not raw:
payload = {"metadata_template": {}}
else:
try:
payload = {"metadata_template": json.loads(raw)}
except Exception:
payload = {"metadata_template": raw}
print(json.dumps(payload))
PY
}
_owui_fix_metadata_template() {
local kb_id="$1" force="${2:-0}"
case "${OPENWEBUI_AUTO_FIX_METADATA,,}" in
0|false|no) return 1 ;;
esac
[ -z "$kb_id" ] && return 1
if [ "$force" != "1" ]; then
for existing in $__OWUI_METADATA_PATCHED; do
[ "$existing" = "$kb_id" ] && return 0
done
fi
local payload methods http_code tmp_body tmp_code
payload="$(_owui_metadata_template_payload)"
methods=(PATCH PUT)
for method in "${methods[@]}"; do
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"
curl -sS -X "$method" \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "$payload" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$kb_id" >"$tmp_code" || true
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
rm -f "$tmp_body" "$tmp_code"
case "$http_code" in
200|201|202|204)
__OWUI_METADATA_PATCHED="${__OWUI_METADATA_PATCHED} $kb_id"
echo "[owui] metadata template adjusted via $method for KB $kb_id"
return 0
;;
0|405) ;;
esac
done
return 1
}
# ------------------------------ OWUI file helpers ------------------------------
_owui_file_get() {
local fid="$1"
@@ -551,33 +608,52 @@ PY
KB_ID="$(_kb_id_by_name "$kb_name")"
echo "[owui] attaching to KB: $kb_name (id: ${KB_ID:-<none>})"
[ -z "$KB_ID" ] && { echo "KB '$kb_name' not found (or ambiguous)." >&2; exit 1; }
_owui_fix_metadata_template "$KB_ID" || true
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "{\"file_id\":\"$FILE_ID\"}" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$KB_ID/file/add" >"$tmp_code" || true
curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"
echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
attach_payload="{\"file_id\":\"$FILE_ID\"}"
attempt=0
while :; do
attempt=$((attempt+1))
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "$attach_payload" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$KB_ID/file/add" >"$tmp_code" || true
curl_exit=$?
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"
echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
case "$http_code" in
200|201|204) : ;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."; exit 0
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
if [ "$http_code" = "400" ] && printf '%s' "$RESP" | grep -qi "metadata"; then
if [ "$attempt" -lt 3 ] && _owui_fix_metadata_template "$KB_ID" 1; then
echo "[owui] retrying attach after metadata template fix"
continue
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
fi
case "$http_code" in
200|201|204)
break
;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."
break
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
done
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
;;
owui-attach-id)
shift || true; kb_id="${1:-}"; file="${2:-}"
@@ -627,31 +703,51 @@ PY
echo "[owui] WARNING: timed out waiting for file extraction; attach may fail" >&2
fi
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "{\"file_id\":\"$FILE_ID\"}" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$kb_id/file/add" >"$tmp_code" || true
curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"; echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
_owui_fix_metadata_template "$kb_id" || true
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
case "$http_code" in
200|201|204) : ;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."; exit 0
attach_payload="{\"file_id\":\"$FILE_ID\"}"
attempt=0
while :; do
attempt=$((attempt+1))
tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)"
curl -sS -X POST \
-H "Authorization: Bearer $OPENWEBUI_API_KEY" \
-H "Content-Type: application/json" \
-d "$attach_payload" \
-D "$tmp_hdrs" \
-w "%{http_code}" --output "$tmp_body" \
"$(_owui_url)/api/v1/knowledge/$kb_id/file/add" >"$tmp_code" || true
curl_exit=$?
http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)"
echo "[owui] response headers:"; sed -n '1,5p' "$tmp_hdrs" || true
RESP="$(cat "$tmp_body")"; echo "$RESP" | ppjson
rm -f "$tmp_body" "$tmp_code" "$tmp_hdrs"
[ $curl_exit -ne 0 ] && { echo "Attach failed: curl exit $curl_exit" >&2; exit $curl_exit; }
[ -z "$http_code" ] || [ "$http_code" = "000" ] && { echo "Attach failed: no HTTP code returned" >&2; exit 1; }
if [ "$http_code" = "400" ] && printf '%s' "$RESP" | grep -qi "metadata"; then
if [ "$attempt" -lt 3 ] && _owui_fix_metadata_template "$kb_id" 1; then
echo "[owui] retrying attach after metadata template fix"
continue
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
fi
case "$http_code" in
200|201|204)
break
;;
*)
if printf '%s' "$RESP" | grep -qi "Duplicate content"; then
echo "[owui] duplicate content — already indexed. Treating as success."
break
fi
echo "Attach failed (HTTP $http_code)" >&2; exit 1
;;
esac
done
[ -n "${TMP_EXTRACT:-}" ] && rm -f "$TMP_EXTRACT" || true
;;
owui-kb-files)
shift || true; kb_name="${1:-}"
@@ -753,4 +849,4 @@ PY
_help
exit 1
;;
esac
esac