Moar fixes
This commit is contained in:
@@ -1,11 +1,16 @@
|
|||||||
FROM python:3.11-slim
|
FROM python:3.11-slim
|
||||||
|
|
||||||
# Keep python fast and quiet, and pip lean
|
# Keep python fast/quiet and pip lean
|
||||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
PIP_NO_CACHE_DIR=1
|
PIP_NO_CACHE_DIR=1 \
|
||||||
|
LANG=C.UTF-8 \
|
||||||
|
LC_ALL=C.UTF-8 \
|
||||||
|
# sensible defaults (can be overridden by .env)
|
||||||
|
WHISPER_MODEL=large-v3 \
|
||||||
|
WHISPER_PRECISION=int8
|
||||||
|
|
||||||
# System deps (ffmpeg for media, jq for scripts, poppler-utils for PDFs, curl for healthcheck)
|
# System deps: ffmpeg for media, curl for healthcheck, jq for scripts, poppler-utils for PDFs
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ffmpeg \
|
ffmpeg \
|
||||||
curl \
|
curl \
|
||||||
@@ -15,18 +20,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Upgrade pip first, then install deps
|
# Upgrade pip toolchain then install Python deps
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN python -m pip install --upgrade pip \
|
RUN python -m pip install --upgrade pip setuptools wheel \
|
||||||
&& pip install --no-cache-dir -r requirements.txt \
|
&& pip install --no-cache-dir -r requirements.txt \
|
||||||
&& pip check || true
|
&& pip check || true
|
||||||
|
|
||||||
# App code
|
# App code
|
||||||
COPY app.py worker.py ./
|
COPY app.py worker.py ./
|
||||||
RUN pip install --no-cache-dir gunicorn==22.0.0
|
RUN pip install --no-cache-dir gunicorn==22.0.0
|
||||||
|
|
||||||
# Basic container health check (relies on curl)
|
# Healthcheck against the app's /health endpoint
|
||||||
HEALTHCHECK --interval=30s --timeout=5s --retries=3 CMD curl -fsS http://127.0.0.1:8080/ || exit 1
|
HEALTHCHECK --interval=30s --timeout=5s --retries=3 CMD curl -fsS http://127.0.0.1:8080/health || exit 1
|
||||||
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
CMD ["gunicorn", "-b", "0.0.0.0:8080", "app:app", "--workers", "2", "--threads", "4"]
|
CMD ["gunicorn", "-b", "0.0.0.0:8080", "app:app", "--workers", "2", "--threads", "4"]
|
23
app/app.py
23
app/app.py
@@ -68,11 +68,24 @@ doSearch();
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def meili_search(qstr, limit=30):
|
def meili_search(qstr, limit=30):
|
||||||
if not qstr.strip(): return []
|
if not qstr.strip():
|
||||||
r = requests.post(f"{MEILI_URL}/indexes/library/search",
|
return []
|
||||||
headers={"Authorization": f"Bearer {MEILI_KEY}","Content-Type":"application/json"},
|
try:
|
||||||
data=json.dumps({"q": qstr, "limit": limit}))
|
r = requests.post(
|
||||||
return r.json().get("hits", [])
|
f"{MEILI_URL}/indexes/library/search",
|
||||||
|
headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type": "application/json"},
|
||||||
|
data=json.dumps({"q": qstr, "limit": limit}),
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
if r.status_code != 200:
|
||||||
|
return []
|
||||||
|
return r.json().get("hits", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health():
|
||||||
|
return "ok"
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
def index():
|
def index():
|
||||||
|
@@ -18,7 +18,14 @@ TRN.mkdir(parents=True, exist_ok=True)
|
|||||||
LIB.mkdir(parents=True, exist_ok=True)
|
LIB.mkdir(parents=True, exist_ok=True)
|
||||||
TMP.mkdir(parents=True, exist_ok=True)
|
TMP.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
model = WhisperModel(MODEL_NAME, compute_type=COMPUTE)
|
# Lazy Whisper model loader so the worker can start even if model download/setup is slow
|
||||||
|
_model = None
|
||||||
|
|
||||||
|
def get_model():
|
||||||
|
global _model
|
||||||
|
if _model is None:
|
||||||
|
_model = WhisperModel(MODEL_NAME, compute_type=COMPUTE)
|
||||||
|
return _model
|
||||||
|
|
||||||
def log(feed):
|
def log(feed):
|
||||||
try:
|
try:
|
||||||
@@ -45,6 +52,7 @@ def yt_dlp(url, outdir):
|
|||||||
return sorted(media, key=lambda p: p.stat().st_mtime)[-1:]
|
return sorted(media, key=lambda p: p.stat().st_mtime)[-1:]
|
||||||
|
|
||||||
def transcribe(media_path: Path):
|
def transcribe(media_path: Path):
|
||||||
|
model = get_model()
|
||||||
segments, info = model.transcribe(str(media_path), vad_filter=True, language="auto")
|
segments, info = model.transcribe(str(media_path), vad_filter=True, language="auto")
|
||||||
title = media_path.stem
|
title = media_path.stem
|
||||||
base = TRN / title
|
base = TRN / title
|
||||||
@@ -83,10 +91,21 @@ def index_meili(json_path: Path):
|
|||||||
"segments": doc.get("segments", []),
|
"segments": doc.get("segments", []),
|
||||||
"meta": {"language": doc.get("language", "")}
|
"meta": {"language": doc.get("language", "")}
|
||||||
}
|
}
|
||||||
r = requests.post(f"{MEILI_URL}/indexes/library/documents",
|
import time
|
||||||
headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type":"application/json"},
|
for attempt in range(5):
|
||||||
data=orjson.dumps(payload))
|
try:
|
||||||
r.raise_for_status()
|
r = requests.post(
|
||||||
|
f"{MEILI_URL}/indexes/library/documents",
|
||||||
|
headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type":"application/json"},
|
||||||
|
data=orjson.dumps(payload),
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
if attempt == 4:
|
||||||
|
raise
|
||||||
|
time.sleep(2 * (attempt + 1))
|
||||||
|
|
||||||
import tldextract, trafilatura, requests as _requests
|
import tldextract, trafilatura, requests as _requests
|
||||||
|
|
||||||
|
@@ -1,4 +1,3 @@
|
|||||||
version: "3.9"
|
|
||||||
services:
|
services:
|
||||||
podx-web:
|
podx-web:
|
||||||
build: ./app
|
build: ./app
|
||||||
@@ -26,7 +25,7 @@ services:
|
|||||||
podx-worker:
|
podx-worker:
|
||||||
build: ./app
|
build: ./app
|
||||||
container_name: podx-worker
|
container_name: podx-worker
|
||||||
command: ["python", "worker.py"]
|
command: ["rq", "worker", "-u", "redis://redis:6379/0", "default"]
|
||||||
env_file: [.env]
|
env_file: [.env]
|
||||||
environment:
|
environment:
|
||||||
MEILI_URL: http://meili:7700
|
MEILI_URL: http://meili:7700
|
||||||
@@ -36,6 +35,7 @@ services:
|
|||||||
TMP_ROOT: /tmpdl
|
TMP_ROOT: /tmpdl
|
||||||
WHISPER_MODEL: large-v3
|
WHISPER_MODEL: large-v3
|
||||||
WHISPER_PRECISION: int8
|
WHISPER_PRECISION: int8
|
||||||
|
PYTHONPATH: /app
|
||||||
volumes:
|
volumes:
|
||||||
- ${LIBRARY_HOST_DIR:-./library}:/library
|
- ${LIBRARY_HOST_DIR:-./library}:/library
|
||||||
- ${TRANSCRIPTS_HOST_DIR:-./transcripts}:/transcripts
|
- ${TRANSCRIPTS_HOST_DIR:-./transcripts}:/transcripts
|
||||||
|
Reference in New Issue
Block a user