#!/usr/bin/env python3 import sys, os, hashlib, json from ebooklib import epub from bs4 import BeautifulSoup import requests MEILI_URL = os.getenv("MEILI_URL","http://localhost:7700") MEILI_KEY = os.getenv("MEILI_KEY","change_me") def post(doc): r = requests.post(f"{MEILI_URL}/indexes/library/documents", headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type":"application/json"}, data=json.dumps(doc)) r.raise_for_status() for path in sys.argv[1:]: book = epub.read_epub(path) title = book.get_metadata('DC', 'title')[0][0] if book.get_metadata('DC','title') else os.path.basename(path) author = "; ".join([a[0] for a in book.get_metadata('DC','creator')]) if book.get_metadata('DC','creator') else "" n=0 for item in book.get_items_of_type(9): soup = BeautifulSoup(item.get_body_content(), "lxml") text = soup.get_text(separator=" ", strip=True) if not text.strip(): continue n+=1 doc = { "id": hashlib.sha1((path+item.get_name()).encode()).hexdigest(), "type": "epub", "title": f"{title} - {item.get_name()}", "source": f"file://{os.path.abspath(path)}", "date": "", "text": text, "meta": {"book_title": title, "book_author": author, "chapter": item.get_name()} } post(doc) print(f"Indexed {title} ({n} sections)")