Files
podx/ingest/ingest_kiwix.sh
2025-09-07 10:42:27 +02:00

18 lines
908 B
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
ZIM="$1"
BASE_URL=${MEILI_URL:-http://localhost:7700}
KEY=${MEILI_KEY:-change_me}
zimdump list "$ZIM" --json | jq -rc '.[] | select(.mimetype=="text/html") | .path' | while read -r path; do
html="$(zimdump dump "$ZIM" "$path" 2>/dev/null || true)"
[ -z "$html" ] && continue
text="$(echo "$html" | sed -e 's/<[^>]*>/ /g' | tr -s ' ' ' ' | sed 's/^[[:space:]]*//')"
title="$(basename "$path" | sed 's/_/ /g')"
id="$(echo -n "${ZIM}:${path}" | sha1sum | awk '{print $1}')"
doc=$(jq -nc --arg id "$id" --arg t "$title" --arg src "zim://$ZIM$path" --arg txt "$text" '{id:$id, type:"kiwix", title:$t, source:$src, date:"", text:$txt, meta:{path:$src}}')
curl -sS -X POST "$BASE_URL/indexes/library/documents" -H "Authorization: Bearer '"$KEY"'" -H 'Content-Type: application/json' --data-binary "$doc" >/dev/null
done
echo "Indexed ZIM: $ZIM"