17 lines
		
	
	
		
			782 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			17 lines
		
	
	
		
			782 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env bash
 | |
| set -euo pipefail
 | |
| BASE_URL=${MEILI_URL:-http://localhost:7700}
 | |
| KEY=${MEILI_KEY:-change_me}
 | |
| 
 | |
| for pdf in "$@"; do
 | |
|   title="$(basename "$pdf")"
 | |
|   pages=$(pdfinfo "$pdf" | awk '/Pages:/ {print $2}')
 | |
|   for p in $(seq 1 "$pages"); do
 | |
|     text="$(pdftotext -f $p -l $p -layout "$pdf" - | sed 's/^[[:space:]]*$//' )"
 | |
|     [ -z "$text" ] && continue
 | |
|     doc=$(jq -nc --arg id "${title}-p${p}" --arg t "$title" --arg src "file://$pdf" --arg txt "$text"           '{id:$id, type:"pdf", title:$t, source:$src, date:"", text:$txt, meta:{page:$id}}')
 | |
|     curl -sS -X POST "$BASE_URL/indexes/library/documents"       -H "Authorization: Bearer '"$KEY"'" -H 'Content-Type: application/json'       --data-binary "$doc" >/dev/null
 | |
|   done
 | |
|   echo "Indexed $title ($pages pages)"
 | |
| done
 |