Setting redis pause
This commit is contained in:
@@ -15,16 +15,35 @@ LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
|
|||||||
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
|
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
|
||||||
TMP = Path(os.getenv("TMP_ROOT", "/tmpdl"))
|
TMP = Path(os.getenv("TMP_ROOT", "/tmpdl"))
|
||||||
|
|
||||||
|
|
||||||
# --- Runtime pause switch for CPU-heavy work (no rebuild needed) ---
|
# --- Runtime pause switch for CPU-heavy work (no rebuild needed) ---
|
||||||
PAUSE_TRANSCRIBE_FILE = Path(os.getenv("PAUSE_TRANSCRIBE_FILE", str(TRN / ".pause_transcribe")))
|
PAUSE_TRANSCRIBE_FILE = Path(os.getenv("PAUSE_TRANSCRIBE_FILE", str(TRN / ".pause_transcribe")))
|
||||||
|
|
||||||
def transcribe_paused() -> bool:
|
# Redis-backed pause flag (podx-tools compatible)
|
||||||
"""Return True if new transcription work should be paused."""
|
PAUSE_TRANSCRIBE_REDIS_KEY = os.getenv("PAUSE_TRANSCRIBE_REDIS_KEY", "podx:transcribe:paused").strip()
|
||||||
|
|
||||||
|
def _pause_flag_redis() -> bool:
|
||||||
|
"""Return True if a truthy pause flag is set in Redis under PAUSE_TRANSCRIBE_REDIS_KEY."""
|
||||||
try:
|
try:
|
||||||
return PAUSE_TRANSCRIBE_FILE.exists()
|
from redis import Redis as _R
|
||||||
|
val = _R.from_url(REDIS_URL).get(PAUSE_TRANSCRIBE_REDIS_KEY)
|
||||||
|
if not val:
|
||||||
|
return False
|
||||||
|
v = val.decode("utf-8", "ignore").strip().lower()
|
||||||
|
return v not in ("", "0", "false", "no", "(nil)")
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def transcribe_paused() -> bool:
|
||||||
|
"""Return True if new transcription work should be paused (file flag or Redis flag)."""
|
||||||
|
try:
|
||||||
|
if PAUSE_TRANSCRIBE_FILE.exists():
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Fall back to Redis-based switch used by podx-tools
|
||||||
|
return _pause_flag_redis()
|
||||||
|
|
||||||
def wait_if_paused(label: str = "transcribe", poll_sec: int = 10):
|
def wait_if_paused(label: str = "transcribe", poll_sec: int = 10):
|
||||||
"""
|
"""
|
||||||
If the pause file exists, block this worker in a low-CPU sleep loop until it is removed.
|
If the pause file exists, block this worker in a low-CPU sleep loop until it is removed.
|
||||||
@@ -889,10 +908,22 @@ def _save_partial(title: str, language: str, segs: list[dict]):
|
|||||||
print(f"[whisper] partial txt save failed: {e}", flush=True)
|
print(f"[whisper] partial txt save failed: {e}", flush=True)
|
||||||
|
|
||||||
def transcribe(media_path: Path):
|
def transcribe(media_path: Path):
|
||||||
model = get_model()
|
|
||||||
print(f"[whisper] start transcribe: {media_path}", flush=True)
|
print(f"[whisper] start transcribe: {media_path}", flush=True)
|
||||||
|
# If paused, abort before any heavy work (no ffmpeg, no model load)
|
||||||
|
if transcribe_paused():
|
||||||
|
print(f"[pause] transcribe: pause active before heavy work; aborting {media_path}", flush=True)
|
||||||
|
raise PauseInterrupt("pause requested before start")
|
||||||
# 1) Robustly extract audio to 16k mono WAV (fixes pyAV/webm edge cases)
|
# 1) Robustly extract audio to 16k mono WAV (fixes pyAV/webm edge cases)
|
||||||
wav = extract_audio(media_path, TMP)
|
wav = extract_audio(media_path, TMP)
|
||||||
|
# Check again after extraction to avoid loading the model if a pause was requested meanwhile
|
||||||
|
if transcribe_paused():
|
||||||
|
try:
|
||||||
|
if wav.exists():
|
||||||
|
wav.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
print(f"[pause] transcribe: pause activated; stopping before model load for {media_path}", flush=True)
|
||||||
|
raise PauseInterrupt("pause requested after extract")
|
||||||
|
|
||||||
title = media_path.stem
|
title = media_path.stem
|
||||||
base = TRN / title
|
base = TRN / title
|
||||||
|
Reference in New Issue
Block a user