Switching to metube for downloads

This commit is contained in:
2025-09-07 13:21:36 +02:00
parent cb46334901
commit 0d22dd1794
3 changed files with 162 additions and 0 deletions

92
app/scanner.py Normal file
View File

@@ -0,0 +1,92 @@
import os
import time
import signal
import sys
from pathlib import Path
from redis import Redis
from rq import Queue
# Config via env (matches docker-compose)
LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", "30")) # seconds
# Media types to track
MEDIA_EXT = {
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
}
# In-memory seen set to avoid re-enqueueing during a single run
_seen: set[str] = set()
def already_transcribed(p: Path) -> bool:
"""Heuristic: if <basename>.json exists in transcripts, consider it done."""
base_json = TRN / f"{p.stem}.json"
return base_json.exists()
def iter_media_files(root: Path):
for path in root.rglob("*"):
if not path.is_file():
continue
if path.suffix.lower() in MEDIA_EXT:
yield path
def enqueue_new_files():
q = Queue(connection=Redis.from_url(REDIS_URL))
# Ensure target dirs exist
TRN.mkdir(parents=True, exist_ok=True)
LIB.mkdir(parents=True, exist_ok=True)
new_jobs = 0
for p in iter_media_files(LIB):
key = str(p.resolve())
if key in _seen:
continue
if already_transcribed(p):
_seen.add(key)
continue
# Enqueue the worker to process this local file
q.enqueue("worker.handle_local_file", key)
_seen.add(key)
new_jobs += 1
return new_jobs
_shutdown = False
def _handle_sig(sig, frame):
global _shutdown
_shutdown = True
def main():
signal.signal(signal.SIGINT, _handle_sig)
signal.signal(signal.SIGTERM, _handle_sig)
print(f"[scanner] Watching {LIB} → transcripts in {TRN}; interval={SCAN_INTERVAL}s", flush=True)
while not _shutdown:
try:
jobs = enqueue_new_files()
if jobs:
print(f"[scanner] Enqueued {jobs} new file(s)", flush=True)
except Exception as e:
print(f"[scanner] Error: {e}", file=sys.stderr, flush=True)
# Sleep between passes
for _ in range(SCAN_INTERVAL):
if _shutdown:
break
time.sleep(1)
print("[scanner] Shutting down", flush=True)
if __name__ == "__main__":
main()

View File

@@ -231,6 +231,32 @@ def publish_to_openwebui(paths):
except Exception as e:
log({"status": "owui_error", "error": str(e)})
from pathlib import Path
def handle_local_file(path_str: str):
"""Transcribe & index a local media file that already exists in /library.
Safe to call repeatedly; it skips if transcript JSON already exists.
"""
try:
p = Path(path_str)
if not p.exists():
log({"url": path_str, "status": "error", "error": "file_not_found"})
return
title = p.stem
base_json = TRN / f"{title}.json"
if base_json.exists():
log({"url": path_str, "status": "skip", "reason": "already_transcribed"})
return
info = {"url": path_str, "status": "transcribing", "title": title, "uploader": p.parent.name, "date": "", "path": str(p)}
log(info)
base = transcribe(p)
index_meili(base.with_suffix(".json"))
publish_to_openwebui([base.with_suffix(".txt")])
log({**info, **{"status": "done"}})
except Exception as e:
log({"url": path_str, "status": "error", "error": str(e)})
raise
def handle_web(url: str):
info = {"url": url, "status":"web-downloading", "title":"", "uploader":"", "date":"", "path":""}
log(info)
@@ -244,6 +270,12 @@ def handle_web(url: str):
def handle_url(url: str):
try:
# If a local file path (or file:// URL) is provided, process it directly
if url.startswith("file://"):
return handle_local_file(url[7:])
if url.startswith("/") and Path(url).exists():
return handle_local_file(url)
if not is_media_url(url):
handle_web(url)
return

View File

@@ -63,3 +63,41 @@ services:
volumes:
- ${REDIS_DATA_HOST_DIR:-./data/redis}:/data
restart: unless-stopped
metube:
image: alexta69/metube:latest
container_name: metube
ports:
- "8081:8081"
environment:
- PUID=1000
- PGID=1000
- TZ=Europe/Prague
- DOWNLOAD_DIR=/downloads
- OUTPUT_TEMPLATE=%(uploader)s/%(upload_date)s - %(title)s.%(ext)s
# Optional: pass a cookies file to bypass consent/age walls
# - COOKIE_FILE=/config/cookies.txt
# Optional: yt-dlp options (JSON). Example enables Android client fallback
# - YTDL_OPTIONS={"extractor_args":{"youtube":{"player_client":"android"}}}
volumes:
- ${LIBRARY_HOST_DIR:-./library}:/downloads
# Optional cookies file on host → /config/cookies.txt inside container
# - /mnt/secure/cookies.txt:/config/cookies.txt:ro
restart: unless-stopped
podx-scanner:
build: ./app
container_name: podx-scanner
command: ["python", "scanner.py"]
env_file: [.env]
environment:
MEILI_URL: http://meili:7700
REDIS_URL: redis://redis:6379/0
LIBRARY_ROOT: /library
TRANSCRIPT_ROOT: /transcripts
SCAN_INTERVAL: 30
volumes:
- ${LIBRARY_HOST_DIR:-./library}:/library
- ${TRANSCRIPTS_HOST_DIR:-./transcripts}:/transcripts
depends_on: [redis]
restart: unless-stopped