37 lines
1.4 KiB
Python
Executable File
37 lines
1.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import sys, os, hashlib, json
|
|
from ebooklib import epub
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
|
|
MEILI_URL = os.getenv("MEILI_URL","http://localhost:7700")
|
|
MEILI_KEY = os.getenv("MEILI_KEY","change_me")
|
|
|
|
def post(doc):
|
|
r = requests.post(f"{MEILI_URL}/indexes/library/documents",
|
|
headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type":"application/json"},
|
|
data=json.dumps(doc))
|
|
r.raise_for_status()
|
|
|
|
for path in sys.argv[1:]:
|
|
book = epub.read_epub(path)
|
|
title = book.get_metadata('DC', 'title')[0][0] if book.get_metadata('DC','title') else os.path.basename(path)
|
|
author = "; ".join([a[0] for a in book.get_metadata('DC','creator')]) if book.get_metadata('DC','creator') else ""
|
|
n=0
|
|
for item in book.get_items_of_type(9):
|
|
soup = BeautifulSoup(item.get_body_content(), "lxml")
|
|
text = soup.get_text(separator=" ", strip=True)
|
|
if not text.strip(): continue
|
|
n+=1
|
|
doc = {
|
|
"id": hashlib.sha1((path+item.get_name()).encode()).hexdigest(),
|
|
"type": "epub",
|
|
"title": f"{title} - {item.get_name()}",
|
|
"source": f"file://{os.path.abspath(path)}",
|
|
"date": "",
|
|
"text": text,
|
|
"meta": {"book_title": title, "book_author": author, "chapter": item.get_name()}
|
|
}
|
|
post(doc)
|
|
print(f"Indexed {title} ({n} sections)")
|