import os import time import signal import sys from pathlib import Path from redis import Redis from rq import Queue # Config via env (matches docker-compose) LIB = Path(os.getenv("LIBRARY_ROOT", "/library")) TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts")) REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0") SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", "30")) # seconds # Media types to track MEDIA_EXT = { ".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus" } # In-memory seen set to avoid re-enqueueing during a single run _seen: set[str] = set() def already_transcribed(p: Path) -> bool: """Heuristic: if .json exists in transcripts, consider it done.""" base_json = TRN / f"{p.stem}.json" return base_json.exists() def iter_media_files(root: Path): for path in root.rglob("*"): if not path.is_file(): continue if path.suffix.lower() in MEDIA_EXT: yield path def enqueue_new_files(): q = Queue(connection=Redis.from_url(REDIS_URL)) # Ensure target dirs exist TRN.mkdir(parents=True, exist_ok=True) LIB.mkdir(parents=True, exist_ok=True) new_jobs = 0 for p in iter_media_files(LIB): key = str(p.resolve()) if key in _seen: continue if already_transcribed(p): _seen.add(key) continue # Enqueue the worker to process this local file q.enqueue("worker.handle_local_file", key) _seen.add(key) new_jobs += 1 return new_jobs _shutdown = False def _handle_sig(sig, frame): global _shutdown _shutdown = True def main(): signal.signal(signal.SIGINT, _handle_sig) signal.signal(signal.SIGTERM, _handle_sig) print(f"[scanner] Watching {LIB} → transcripts in {TRN}; interval={SCAN_INTERVAL}s", flush=True) while not _shutdown: try: jobs = enqueue_new_files() if jobs: print(f"[scanner] Enqueued {jobs} new file(s)", flush=True) except Exception as e: print(f"[scanner] Error: {e}", file=sys.stderr, flush=True) # Sleep between passes for _ in range(SCAN_INTERVAL): if _shutdown: break time.sleep(1) print("[scanner] Shutting down", flush=True) if __name__ == "__main__": main()