Moar fixes

This commit is contained in:
2025-09-07 13:00:30 +02:00
parent 8c84e27a0a
commit 5c68154775
4 changed files with 57 additions and 20 deletions

View File

@@ -18,7 +18,14 @@ TRN.mkdir(parents=True, exist_ok=True)
LIB.mkdir(parents=True, exist_ok=True)
TMP.mkdir(parents=True, exist_ok=True)
model = WhisperModel(MODEL_NAME, compute_type=COMPUTE)
# Lazy Whisper model loader so the worker can start even if model download/setup is slow
_model = None
def get_model():
global _model
if _model is None:
_model = WhisperModel(MODEL_NAME, compute_type=COMPUTE)
return _model
def log(feed):
try:
@@ -45,6 +52,7 @@ def yt_dlp(url, outdir):
return sorted(media, key=lambda p: p.stat().st_mtime)[-1:]
def transcribe(media_path: Path):
model = get_model()
segments, info = model.transcribe(str(media_path), vad_filter=True, language="auto")
title = media_path.stem
base = TRN / title
@@ -83,10 +91,21 @@ def index_meili(json_path: Path):
"segments": doc.get("segments", []),
"meta": {"language": doc.get("language", "")}
}
r = requests.post(f"{MEILI_URL}/indexes/library/documents",
headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type":"application/json"},
data=orjson.dumps(payload))
r.raise_for_status()
import time
for attempt in range(5):
try:
r = requests.post(
f"{MEILI_URL}/indexes/library/documents",
headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type":"application/json"},
data=orjson.dumps(payload),
timeout=15,
)
r.raise_for_status()
break
except Exception:
if attempt == 4:
raise
time.sleep(2 * (attempt + 1))
import tldextract, trafilatura, requests as _requests