From 89420a2c099be56c7f4fa1820d0894ca9429622d Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Mon, 8 Sep 2025 18:09:04 +0200 Subject: [PATCH] Tool fix --- scripts/podx-tools.sh | 86 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 3 deletions(-) diff --git a/scripts/podx-tools.sh b/scripts/podx-tools.sh index c1a22d4..c25ba8e 100755 --- a/scripts/podx-tools.sh +++ b/scripts/podx-tools.sh @@ -451,10 +451,45 @@ PY _require "OPENWEBUI_URL" "$OPENWEBUI_URL" _require "OPENWEBUI_API_KEY" "$OPENWEBUI_API_KEY" + # Decide how to send the file (force text/plain for .txt/.md; optionally extract JSON->text) + upload_flag=("-F" "file=@$file") + ext="${file##*.}" + base="$(basename "$file")" + if [[ "$ext" =~ ^([Tt][Xx][Tt]|[Mm][Dd]|[Mm][Aa][Rr][Kk][Dd][Oo][Ww][Nn])$ ]]; then + upload_flag=("-F" "file=@$file;type=text/plain;filename=$base") + elif [[ "$ext" =~ ^([Jj][Ss][Oo][Nn])$ ]]; then + # Try to extract human text from JSON and upload as text/plain + if command -v jq >/dev/null 2>&1; then + tmp_txt="$(_mktemp)" + # Extract common transcript shapes: .text or .segments[].text (strings only) + if jq -er ' + if type=="object" and (.text|type=="string") then .text + elif type=="object" and (.segments|type=="array") then + (.segments[]? | if type=="object" and (.text|type=="string") then .text + elif type=="string" then . + else empty end) + else empty end + ' "$file" >"$tmp_txt"; then + if [ -s "$tmp_txt" ]; then + # Keep original stem but force .txt for OWUI indexing + stem="${base%.*}" + upload_flag=("-F" "file=@$tmp_txt;type=text/plain;filename=${stem}.txt") + echo "[owui] extracted text from JSON -> ${stem}.txt" + else + echo "[owui] WARNING: JSON had no extractable text, uploading raw JSON (may be rejected)" >&2 + fi + else + echo "[owui] WARNING: jq failed to parse JSON, uploading raw JSON (may be rejected)" >&2 + fi + else + echo "[owui] NOTE: jq not installed; uploading raw JSON (may be rejected)" >&2 + fi + fi + # 1) Upload tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)" curl -sS -H "Authorization: Bearer $OPENWEBUI_API_KEY" \ - -F "file=@$file" \ + "${upload_flag[@]}" \ -w "%{http_code}" --output "$tmp_body" "$(_owui_url)/api/v1/files/" >"$tmp_code" || true curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)" FILE_JSON="$(cat "$tmp_body")" @@ -471,6 +506,11 @@ PY FILE_ID="$(python3 -c 'import sys,json; d=json.load(sys.stdin); print(d.get("id") or (d.get("data") or {}).get("id",""))' <<<"$FILE_JSON")" if [ -z "$FILE_ID" ]; then echo "Upload failed (no file id)"; exit 1; fi + # Wait until OWUI finishes processing/extracting text for this file (prevents "content empty" 400) + if ! _owui_wait_file "$FILE_ID" 180; then + echo "[owui] WARNING: timed out waiting for file content; attach may fail if OWUI hasn't extracted text yet" >&2 + fi + # 2) Resolve KB and attach KB_ID="$(_kb_id_by_name "$kb_name")" echo "[owui] attaching to KB: $kb_name (id: $KB_ID)" @@ -524,10 +564,45 @@ PY _require "OPENWEBUI_URL" "$OPENWEBUI_URL" _require "OPENWEBUI_API_KEY" "$OPENWEBUI_API_KEY" + # Decide how to send the file (force text/plain for .txt/.md; optionally extract JSON->text) + upload_flag=("-F" "file=@$file") + ext="${file##*.}" + base="$(basename "$file")" + if [[ "$ext" =~ ^([Tt][Xx][Tt]|[Mm][Dd]|[Mm][Aa][Rr][Kk][Dd][Oo][Ww][Nn])$ ]]; then + upload_flag=("-F" "file=@$file;type=text/plain;filename=$base") + elif [[ "$ext" =~ ^([Jj][Ss][Oo][Nn])$ ]]; then + # Try to extract human text from JSON and upload as text/plain + if command -v jq >/dev/null 2>&1; then + tmp_txt="$(_mktemp)" + # Extract common transcript shapes: .text or .segments[].text (strings only) + if jq -er ' + if type=="object" and (.text|type=="string") then .text + elif type=="object" and (.segments|type=="array") then + (.segments[]? | if type=="object" and (.text|type=="string") then .text + elif type=="string" then . + else empty end) + else empty end + ' "$file" >"$tmp_txt"; then + if [ -s "$tmp_txt" ]; then + # Keep original stem but force .txt for OWUI indexing + stem="${base%.*}" + upload_flag=("-F" "file=@$tmp_txt;type=text/plain;filename=${stem}.txt") + echo "[owui] extracted text from JSON -> ${stem}.txt" + else + echo "[owui] WARNING: JSON had no extractable text, uploading raw JSON (may be rejected)" >&2 + fi + else + echo "[owui] WARNING: jq failed to parse JSON, uploading raw JSON (may be rejected)" >&2 + fi + else + echo "[owui] NOTE: jq not installed; uploading raw JSON (may be rejected)" >&2 + fi + fi + # 1) Upload tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)" curl -sS -H "Authorization: Bearer $OPENWEBUI_API_KEY" \ - -F "file=@$file" \ + "${upload_flag[@]}" \ -w "%{http_code}" --output "$tmp_body" "$(_owui_url)/api/v1/files/" >"$tmp_code" || true curl_exit=$?; http_code="$(cat "$tmp_code" 2>/dev/null || echo 0)" FILE_JSON="$(cat "$tmp_body")" @@ -541,9 +616,14 @@ PY echo "Upload failed (HTTP $http_code)" >&2; exit 1 fi - FILE_ID="$(python3 -c 'import sys,json; d=json.load(sys.stdin); print(d.get(\"id\") or (d.get(\"data\") or {}).get(\"id\",\"\"))' <<<"$FILE_JSON")" + FILE_ID="$(python3 -c 'import sys,json; d=json.load(sys.stdin); print(d.get("id") or (d.get("data") or {}).get("id",""))' <<<"$FILE_JSON")" if [ -z "$FILE_ID" ]; then echo "Upload failed (no file id)"; exit 1; fi + # Wait until OWUI finishes processing/extracting text for this file (prevents "content empty" 400) + if ! _owui_wait_file "$FILE_ID" 180; then + echo "[owui] WARNING: timed out waiting for file content; attach may fail if OWUI hasn't extracted text yet" >&2 + fi + # 2) Attach using explicit KB id tmp_body="$(_mktemp)"; tmp_code="$(_mktemp)"; tmp_hdrs="$(_mktemp)" curl -sS -X POST \