Switching to metube for downloads
This commit is contained in:
92
app/scanner.py
Normal file
92
app/scanner.py
Normal file
@@ -0,0 +1,92 @@
|
||||
|
||||
|
||||
import os
|
||||
import time
|
||||
import signal
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from redis import Redis
|
||||
from rq import Queue
|
||||
|
||||
# Config via env (matches docker-compose)
|
||||
LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
|
||||
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", "30")) # seconds
|
||||
|
||||
# Media types to track
|
||||
MEDIA_EXT = {
|
||||
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
|
||||
}
|
||||
|
||||
# In-memory seen set to avoid re-enqueueing during a single run
|
||||
_seen: set[str] = set()
|
||||
|
||||
|
||||
def already_transcribed(p: Path) -> bool:
|
||||
"""Heuristic: if <basename>.json exists in transcripts, consider it done."""
|
||||
base_json = TRN / f"{p.stem}.json"
|
||||
return base_json.exists()
|
||||
|
||||
|
||||
def iter_media_files(root: Path):
|
||||
for path in root.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
if path.suffix.lower() in MEDIA_EXT:
|
||||
yield path
|
||||
|
||||
|
||||
def enqueue_new_files():
|
||||
q = Queue(connection=Redis.from_url(REDIS_URL))
|
||||
|
||||
# Ensure target dirs exist
|
||||
TRN.mkdir(parents=True, exist_ok=True)
|
||||
LIB.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
new_jobs = 0
|
||||
for p in iter_media_files(LIB):
|
||||
key = str(p.resolve())
|
||||
if key in _seen:
|
||||
continue
|
||||
if already_transcribed(p):
|
||||
_seen.add(key)
|
||||
continue
|
||||
# Enqueue the worker to process this local file
|
||||
q.enqueue("worker.handle_local_file", key)
|
||||
_seen.add(key)
|
||||
new_jobs += 1
|
||||
return new_jobs
|
||||
|
||||
|
||||
_shutdown = False
|
||||
|
||||
|
||||
def _handle_sig(sig, frame):
|
||||
global _shutdown
|
||||
_shutdown = True
|
||||
|
||||
|
||||
def main():
|
||||
signal.signal(signal.SIGINT, _handle_sig)
|
||||
signal.signal(signal.SIGTERM, _handle_sig)
|
||||
|
||||
print(f"[scanner] Watching {LIB} → transcripts in {TRN}; interval={SCAN_INTERVAL}s", flush=True)
|
||||
while not _shutdown:
|
||||
try:
|
||||
jobs = enqueue_new_files()
|
||||
if jobs:
|
||||
print(f"[scanner] Enqueued {jobs} new file(s)", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[scanner] Error: {e}", file=sys.stderr, flush=True)
|
||||
# Sleep between passes
|
||||
for _ in range(SCAN_INTERVAL):
|
||||
if _shutdown:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
print("[scanner] Shutting down", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -231,6 +231,32 @@ def publish_to_openwebui(paths):
|
||||
except Exception as e:
|
||||
log({"status": "owui_error", "error": str(e)})
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
def handle_local_file(path_str: str):
|
||||
"""Transcribe & index a local media file that already exists in /library.
|
||||
Safe to call repeatedly; it skips if transcript JSON already exists.
|
||||
"""
|
||||
try:
|
||||
p = Path(path_str)
|
||||
if not p.exists():
|
||||
log({"url": path_str, "status": "error", "error": "file_not_found"})
|
||||
return
|
||||
title = p.stem
|
||||
base_json = TRN / f"{title}.json"
|
||||
if base_json.exists():
|
||||
log({"url": path_str, "status": "skip", "reason": "already_transcribed"})
|
||||
return
|
||||
info = {"url": path_str, "status": "transcribing", "title": title, "uploader": p.parent.name, "date": "", "path": str(p)}
|
||||
log(info)
|
||||
base = transcribe(p)
|
||||
index_meili(base.with_suffix(".json"))
|
||||
publish_to_openwebui([base.with_suffix(".txt")])
|
||||
log({**info, **{"status": "done"}})
|
||||
except Exception as e:
|
||||
log({"url": path_str, "status": "error", "error": str(e)})
|
||||
raise
|
||||
|
||||
def handle_web(url: str):
|
||||
info = {"url": url, "status":"web-downloading", "title":"", "uploader":"", "date":"", "path":""}
|
||||
log(info)
|
||||
@@ -244,6 +270,12 @@ def handle_web(url: str):
|
||||
|
||||
def handle_url(url: str):
|
||||
try:
|
||||
# If a local file path (or file:// URL) is provided, process it directly
|
||||
if url.startswith("file://"):
|
||||
return handle_local_file(url[7:])
|
||||
if url.startswith("/") and Path(url).exists():
|
||||
return handle_local_file(url)
|
||||
|
||||
if not is_media_url(url):
|
||||
handle_web(url)
|
||||
return
|
||||
|
@@ -63,3 +63,41 @@ services:
|
||||
volumes:
|
||||
- ${REDIS_DATA_HOST_DIR:-./data/redis}:/data
|
||||
restart: unless-stopped
|
||||
|
||||
metube:
|
||||
image: alexta69/metube:latest
|
||||
container_name: metube
|
||||
ports:
|
||||
- "8081:8081"
|
||||
environment:
|
||||
- PUID=1000
|
||||
- PGID=1000
|
||||
- TZ=Europe/Prague
|
||||
- DOWNLOAD_DIR=/downloads
|
||||
- OUTPUT_TEMPLATE=%(uploader)s/%(upload_date)s - %(title)s.%(ext)s
|
||||
# Optional: pass a cookies file to bypass consent/age walls
|
||||
# - COOKIE_FILE=/config/cookies.txt
|
||||
# Optional: yt-dlp options (JSON). Example enables Android client fallback
|
||||
# - YTDL_OPTIONS={"extractor_args":{"youtube":{"player_client":"android"}}}
|
||||
volumes:
|
||||
- ${LIBRARY_HOST_DIR:-./library}:/downloads
|
||||
# Optional cookies file on host → /config/cookies.txt inside container
|
||||
# - /mnt/secure/cookies.txt:/config/cookies.txt:ro
|
||||
restart: unless-stopped
|
||||
|
||||
podx-scanner:
|
||||
build: ./app
|
||||
container_name: podx-scanner
|
||||
command: ["python", "scanner.py"]
|
||||
env_file: [.env]
|
||||
environment:
|
||||
MEILI_URL: http://meili:7700
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
LIBRARY_ROOT: /library
|
||||
TRANSCRIPT_ROOT: /transcripts
|
||||
SCAN_INTERVAL: 30
|
||||
volumes:
|
||||
- ${LIBRARY_HOST_DIR:-./library}:/library
|
||||
- ${TRANSCRIPTS_HOST_DIR:-./transcripts}:/transcripts
|
||||
depends_on: [redis]
|
||||
restart: unless-stopped
|
||||
|
Reference in New Issue
Block a user