diff --git a/app/worker.py b/app/worker.py index 50ffec9..3bb8857 100644 --- a/app/worker.py +++ b/app/worker.py @@ -15,16 +15,35 @@ LIB = Path(os.getenv("LIBRARY_ROOT", "/library")) TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts")) TMP = Path(os.getenv("TMP_ROOT", "/tmpdl")) + # --- Runtime pause switch for CPU-heavy work (no rebuild needed) --- PAUSE_TRANSCRIBE_FILE = Path(os.getenv("PAUSE_TRANSCRIBE_FILE", str(TRN / ".pause_transcribe"))) -def transcribe_paused() -> bool: - """Return True if new transcription work should be paused.""" +# Redis-backed pause flag (podx-tools compatible) +PAUSE_TRANSCRIBE_REDIS_KEY = os.getenv("PAUSE_TRANSCRIBE_REDIS_KEY", "podx:transcribe:paused").strip() + +def _pause_flag_redis() -> bool: + """Return True if a truthy pause flag is set in Redis under PAUSE_TRANSCRIBE_REDIS_KEY.""" try: - return PAUSE_TRANSCRIBE_FILE.exists() + from redis import Redis as _R + val = _R.from_url(REDIS_URL).get(PAUSE_TRANSCRIBE_REDIS_KEY) + if not val: + return False + v = val.decode("utf-8", "ignore").strip().lower() + return v not in ("", "0", "false", "no", "(nil)") except Exception: return False +def transcribe_paused() -> bool: + """Return True if new transcription work should be paused (file flag or Redis flag).""" + try: + if PAUSE_TRANSCRIBE_FILE.exists(): + return True + except Exception: + pass + # Fall back to Redis-based switch used by podx-tools + return _pause_flag_redis() + def wait_if_paused(label: str = "transcribe", poll_sec: int = 10): """ If the pause file exists, block this worker in a low-CPU sleep loop until it is removed. @@ -889,10 +908,22 @@ def _save_partial(title: str, language: str, segs: list[dict]): print(f"[whisper] partial txt save failed: {e}", flush=True) def transcribe(media_path: Path): - model = get_model() print(f"[whisper] start transcribe: {media_path}", flush=True) + # If paused, abort before any heavy work (no ffmpeg, no model load) + if transcribe_paused(): + print(f"[pause] transcribe: pause active before heavy work; aborting {media_path}", flush=True) + raise PauseInterrupt("pause requested before start") # 1) Robustly extract audio to 16k mono WAV (fixes pyAV/webm edge cases) wav = extract_audio(media_path, TMP) + # Check again after extraction to avoid loading the model if a pause was requested meanwhile + if transcribe_paused(): + try: + if wav.exists(): + wav.unlink() + except Exception: + pass + print(f"[pause] transcribe: pause activated; stopping before model load for {media_path}", flush=True) + raise PauseInterrupt("pause requested after extract") title = media_path.stem base = TRN / title