Switching to metube for downloads
This commit is contained in:
92
app/scanner.py
Normal file
92
app/scanner.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from redis import Redis
|
||||||
|
from rq import Queue
|
||||||
|
|
||||||
|
# Config via env (matches docker-compose)
|
||||||
|
LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
|
||||||
|
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
|
||||||
|
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||||
|
SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", "30")) # seconds
|
||||||
|
|
||||||
|
# Media types to track
|
||||||
|
MEDIA_EXT = {
|
||||||
|
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
|
||||||
|
}
|
||||||
|
|
||||||
|
# In-memory seen set to avoid re-enqueueing during a single run
|
||||||
|
_seen: set[str] = set()
|
||||||
|
|
||||||
|
|
||||||
|
def already_transcribed(p: Path) -> bool:
|
||||||
|
"""Heuristic: if <basename>.json exists in transcripts, consider it done."""
|
||||||
|
base_json = TRN / f"{p.stem}.json"
|
||||||
|
return base_json.exists()
|
||||||
|
|
||||||
|
|
||||||
|
def iter_media_files(root: Path):
|
||||||
|
for path in root.rglob("*"):
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
if path.suffix.lower() in MEDIA_EXT:
|
||||||
|
yield path
|
||||||
|
|
||||||
|
|
||||||
|
def enqueue_new_files():
|
||||||
|
q = Queue(connection=Redis.from_url(REDIS_URL))
|
||||||
|
|
||||||
|
# Ensure target dirs exist
|
||||||
|
TRN.mkdir(parents=True, exist_ok=True)
|
||||||
|
LIB.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
new_jobs = 0
|
||||||
|
for p in iter_media_files(LIB):
|
||||||
|
key = str(p.resolve())
|
||||||
|
if key in _seen:
|
||||||
|
continue
|
||||||
|
if already_transcribed(p):
|
||||||
|
_seen.add(key)
|
||||||
|
continue
|
||||||
|
# Enqueue the worker to process this local file
|
||||||
|
q.enqueue("worker.handle_local_file", key)
|
||||||
|
_seen.add(key)
|
||||||
|
new_jobs += 1
|
||||||
|
return new_jobs
|
||||||
|
|
||||||
|
|
||||||
|
_shutdown = False
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_sig(sig, frame):
|
||||||
|
global _shutdown
|
||||||
|
_shutdown = True
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
signal.signal(signal.SIGINT, _handle_sig)
|
||||||
|
signal.signal(signal.SIGTERM, _handle_sig)
|
||||||
|
|
||||||
|
print(f"[scanner] Watching {LIB} → transcripts in {TRN}; interval={SCAN_INTERVAL}s", flush=True)
|
||||||
|
while not _shutdown:
|
||||||
|
try:
|
||||||
|
jobs = enqueue_new_files()
|
||||||
|
if jobs:
|
||||||
|
print(f"[scanner] Enqueued {jobs} new file(s)", flush=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[scanner] Error: {e}", file=sys.stderr, flush=True)
|
||||||
|
# Sleep between passes
|
||||||
|
for _ in range(SCAN_INTERVAL):
|
||||||
|
if _shutdown:
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
print("[scanner] Shutting down", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@@ -231,6 +231,32 @@ def publish_to_openwebui(paths):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log({"status": "owui_error", "error": str(e)})
|
log({"status": "owui_error", "error": str(e)})
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def handle_local_file(path_str: str):
|
||||||
|
"""Transcribe & index a local media file that already exists in /library.
|
||||||
|
Safe to call repeatedly; it skips if transcript JSON already exists.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
p = Path(path_str)
|
||||||
|
if not p.exists():
|
||||||
|
log({"url": path_str, "status": "error", "error": "file_not_found"})
|
||||||
|
return
|
||||||
|
title = p.stem
|
||||||
|
base_json = TRN / f"{title}.json"
|
||||||
|
if base_json.exists():
|
||||||
|
log({"url": path_str, "status": "skip", "reason": "already_transcribed"})
|
||||||
|
return
|
||||||
|
info = {"url": path_str, "status": "transcribing", "title": title, "uploader": p.parent.name, "date": "", "path": str(p)}
|
||||||
|
log(info)
|
||||||
|
base = transcribe(p)
|
||||||
|
index_meili(base.with_suffix(".json"))
|
||||||
|
publish_to_openwebui([base.with_suffix(".txt")])
|
||||||
|
log({**info, **{"status": "done"}})
|
||||||
|
except Exception as e:
|
||||||
|
log({"url": path_str, "status": "error", "error": str(e)})
|
||||||
|
raise
|
||||||
|
|
||||||
def handle_web(url: str):
|
def handle_web(url: str):
|
||||||
info = {"url": url, "status":"web-downloading", "title":"", "uploader":"", "date":"", "path":""}
|
info = {"url": url, "status":"web-downloading", "title":"", "uploader":"", "date":"", "path":""}
|
||||||
log(info)
|
log(info)
|
||||||
@@ -244,6 +270,12 @@ def handle_web(url: str):
|
|||||||
|
|
||||||
def handle_url(url: str):
|
def handle_url(url: str):
|
||||||
try:
|
try:
|
||||||
|
# If a local file path (or file:// URL) is provided, process it directly
|
||||||
|
if url.startswith("file://"):
|
||||||
|
return handle_local_file(url[7:])
|
||||||
|
if url.startswith("/") and Path(url).exists():
|
||||||
|
return handle_local_file(url)
|
||||||
|
|
||||||
if not is_media_url(url):
|
if not is_media_url(url):
|
||||||
handle_web(url)
|
handle_web(url)
|
||||||
return
|
return
|
||||||
|
@@ -63,3 +63,41 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ${REDIS_DATA_HOST_DIR:-./data/redis}:/data
|
- ${REDIS_DATA_HOST_DIR:-./data/redis}:/data
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
metube:
|
||||||
|
image: alexta69/metube:latest
|
||||||
|
container_name: metube
|
||||||
|
ports:
|
||||||
|
- "8081:8081"
|
||||||
|
environment:
|
||||||
|
- PUID=1000
|
||||||
|
- PGID=1000
|
||||||
|
- TZ=Europe/Prague
|
||||||
|
- DOWNLOAD_DIR=/downloads
|
||||||
|
- OUTPUT_TEMPLATE=%(uploader)s/%(upload_date)s - %(title)s.%(ext)s
|
||||||
|
# Optional: pass a cookies file to bypass consent/age walls
|
||||||
|
# - COOKIE_FILE=/config/cookies.txt
|
||||||
|
# Optional: yt-dlp options (JSON). Example enables Android client fallback
|
||||||
|
# - YTDL_OPTIONS={"extractor_args":{"youtube":{"player_client":"android"}}}
|
||||||
|
volumes:
|
||||||
|
- ${LIBRARY_HOST_DIR:-./library}:/downloads
|
||||||
|
# Optional cookies file on host → /config/cookies.txt inside container
|
||||||
|
# - /mnt/secure/cookies.txt:/config/cookies.txt:ro
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
podx-scanner:
|
||||||
|
build: ./app
|
||||||
|
container_name: podx-scanner
|
||||||
|
command: ["python", "scanner.py"]
|
||||||
|
env_file: [.env]
|
||||||
|
environment:
|
||||||
|
MEILI_URL: http://meili:7700
|
||||||
|
REDIS_URL: redis://redis:6379/0
|
||||||
|
LIBRARY_ROOT: /library
|
||||||
|
TRANSCRIPT_ROOT: /transcripts
|
||||||
|
SCAN_INTERVAL: 30
|
||||||
|
volumes:
|
||||||
|
- ${LIBRARY_HOST_DIR:-./library}:/library
|
||||||
|
- ${TRANSCRIPTS_HOST_DIR:-./transcripts}:/transcripts
|
||||||
|
depends_on: [redis]
|
||||||
|
restart: unless-stopped
|
||||||
|
Reference in New Issue
Block a user