Files
podx/app/scanner.py

92 lines
2.3 KiB
Python

import os
import time
import signal
import sys
from pathlib import Path
from redis import Redis
from rq import Queue
# Config via env (matches docker-compose)
LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", "30")) # seconds
# Media types to track
MEDIA_EXT = {
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
}
# In-memory seen set to avoid re-enqueueing during a single run
_seen: set[str] = set()
def already_transcribed(p: Path) -> bool:
"""Heuristic: if <basename>.json exists in transcripts, consider it done."""
base_json = TRN / f"{p.stem}.json"
return base_json.exists()
def iter_media_files(root: Path):
for path in root.rglob("*"):
if not path.is_file():
continue
if path.suffix.lower() in MEDIA_EXT:
yield path
def enqueue_new_files():
q = Queue(connection=Redis.from_url(REDIS_URL))
# Ensure target dirs exist
TRN.mkdir(parents=True, exist_ok=True)
LIB.mkdir(parents=True, exist_ok=True)
new_jobs = 0
for p in iter_media_files(LIB):
key = str(p.resolve())
if key in _seen:
continue
if already_transcribed(p):
_seen.add(key)
continue
# Enqueue the worker to process this local file
q.enqueue("worker.handle_local_file", key)
_seen.add(key)
new_jobs += 1
return new_jobs
_shutdown = False
def _handle_sig(sig, frame):
global _shutdown
_shutdown = True
def main():
signal.signal(signal.SIGINT, _handle_sig)
signal.signal(signal.SIGTERM, _handle_sig)
print(f"[scanner] Watching {LIB} → transcripts in {TRN}; interval={SCAN_INTERVAL}s", flush=True)
while not _shutdown:
try:
jobs = enqueue_new_files()
if jobs:
print(f"[scanner] Enqueued {jobs} new file(s)", flush=True)
except Exception as e:
print(f"[scanner] Error: {e}", file=sys.stderr, flush=True)
# Sleep between passes
for _ in range(SCAN_INTERVAL):
if _shutdown:
break
time.sleep(1)
print("[scanner] Shutting down", flush=True)
if __name__ == "__main__":
main()