Added the ability to refresh metadata
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
|
||||
|
||||
import os
|
||||
from typing import Set
|
||||
import time
|
||||
import signal
|
||||
import sys
|
||||
@@ -20,13 +21,19 @@ JOB_TTL = int(os.getenv("JOB_TTL", "86400")) # 24 hours
|
||||
RESULT_TTL = int(os.getenv("RESULT_TTL", "86400")) # 24 hours
|
||||
FAILURE_TTL = int(os.getenv("FAILURE_TTL", "86400")) # 24 hours
|
||||
|
||||
# Optional refresh of existing items to fetch metadata/subtitles/thumbnails
|
||||
REFRESH_EXISTING = os.getenv("REFRESH_EXISTING", "1").strip() not in ("0", "false", "False", "")
|
||||
REFRESH_TTL = int(os.getenv("REFRESH_TTL", "21600")) # 6 hours
|
||||
REFRESH_FAILURE_TTL = int(os.getenv("REFRESH_FAILURE_TTL", "21600"))
|
||||
|
||||
# Media types to track
|
||||
MEDIA_EXT = {
|
||||
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
|
||||
}
|
||||
|
||||
# In-memory seen set to avoid re-enqueueing during a single run
|
||||
_seen: set[str] = set()
|
||||
_seen: Set[str] = set()
|
||||
_seen_refresh: Set[str] = set()
|
||||
|
||||
|
||||
def already_transcribed(p: Path) -> bool:
|
||||
@@ -35,6 +42,28 @@ def already_transcribed(p: Path) -> bool:
|
||||
return base_json.exists()
|
||||
|
||||
|
||||
# Helper to decide when to refresh sidecars
|
||||
def needs_refresh(p: Path) -> bool:
|
||||
"""
|
||||
Decide whether to refresh sidecars for a media file:
|
||||
- If metadata (*.info.json) is missing
|
||||
- If no subtitle SRT is present next to media (either .srt or .en.srt)
|
||||
- If no thumbnail JPG/PNG is present next to media
|
||||
"""
|
||||
stem = p.with_suffix("")
|
||||
info_json = stem.with_suffix(".info.json")
|
||||
# Accept any language-suffixed SRT as well
|
||||
srt_plain = stem.with_suffix(".srt")
|
||||
srt_en = p.with_suffix(".en.srt")
|
||||
has_any_srt = srt_plain.exists() or srt_en.exists() or any(p.parent.glob(p.stem + ".*.srt"))
|
||||
thumb_jpg = stem.with_suffix(".jpg")
|
||||
thumb_png = stem.with_suffix(".png")
|
||||
missing_info = not info_json.exists()
|
||||
missing_subs = not has_any_srt
|
||||
missing_thumb = not (thumb_jpg.exists() or thumb_png.exists())
|
||||
return missing_info or missing_subs or missing_thumb
|
||||
|
||||
|
||||
def iter_media_files(root: Path):
|
||||
for path in root.rglob("*"):
|
||||
if not path.is_file():
|
||||
@@ -57,7 +86,23 @@ def enqueue_new_files():
|
||||
continue
|
||||
if already_transcribed(p):
|
||||
_seen.add(key)
|
||||
print(f"[scanner] Skip (already transcribed): {p}", flush=True)
|
||||
if REFRESH_EXISTING and needs_refresh(p):
|
||||
if key not in _seen_refresh:
|
||||
# Ask worker to refresh metadata/subtitles/thumbnails without redownloading media
|
||||
q.enqueue(
|
||||
"worker.refresh_media",
|
||||
key,
|
||||
job_timeout=JOB_TIMEOUT,
|
||||
ttl=REFRESH_TTL,
|
||||
result_ttl=RESULT_TTL,
|
||||
failure_ttl=REFRESH_FAILURE_TTL,
|
||||
)
|
||||
_seen_refresh.add(key)
|
||||
print(f"[scanner] Refresh enqueued: {p}", flush=True)
|
||||
else:
|
||||
print(f"[scanner] Skip (already queued refresh): {p}", flush=True)
|
||||
else:
|
||||
print(f"[scanner] Skip (already transcribed): {p}", flush=True)
|
||||
continue
|
||||
# Enqueue the worker to process this local file (with generous timeouts)
|
||||
q.enqueue(
|
||||
|
106
app/worker.py
106
app/worker.py
@@ -1280,6 +1280,112 @@ def handle_local_file(path_str: str):
|
||||
log({"url": path_str, "status": "error", "error": str(e)})
|
||||
raise
|
||||
|
||||
|
||||
# --- Refresh sidecar metadata and subtitles for an already-downloaded media file ---
|
||||
def refresh_media(path_str: str):
|
||||
"""
|
||||
Refresh sidecar metadata (info.json, thumbnail) and subtitles for an already-downloaded media file.
|
||||
Requires a companion .info.json next to the media (to supply the original URL). No media re-download.
|
||||
"""
|
||||
try:
|
||||
p = Path(path_str)
|
||||
if not p.exists() or not p.is_file():
|
||||
log({"url": path_str, "status": "error", "error": "file_not_found"})
|
||||
return
|
||||
|
||||
# Locate existing info.json to get the original URL
|
||||
info_json = None
|
||||
for cand in [p.parent / f"{p.name}.info.json", p.parent / f"{p.stem}.info.json"]:
|
||||
if cand.exists():
|
||||
info_json = cand
|
||||
break
|
||||
|
||||
if not info_json:
|
||||
log({"path": str(p), "status": "refresh-skip", "reason": "no_info_json"})
|
||||
print(f"[refresh] skip: no info.json next to {p}", flush=True)
|
||||
return
|
||||
|
||||
info = load_info_json(info_json) or {}
|
||||
url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
||||
if not url:
|
||||
log({"path": str(p), "status": "refresh-skip", "reason": "no_url_in_info"})
|
||||
print(f"[refresh] skip: no URL in {info_json}", flush=True)
|
||||
return
|
||||
|
||||
# Prepare yt-dlp command to refresh sidecars only, writing files exactly next to the media
|
||||
outtmpl = str(p.with_suffix(".%(ext)s"))
|
||||
sub_langs = os.getenv("YTDLP_SUBS_LANGS", "en.*,en")
|
||||
|
||||
cmd = [
|
||||
"yt-dlp",
|
||||
"--skip-download",
|
||||
"--write-info-json",
|
||||
"--write-thumbnail",
|
||||
"--convert-thumbnails", "jpg",
|
||||
"--write-subs", "--write-auto-subs",
|
||||
"--sub-langs", sub_langs,
|
||||
"--convert-subs", "srt",
|
||||
"-o", outtmpl,
|
||||
url,
|
||||
]
|
||||
|
||||
print(f"[refresh] refreshing sidecars for {p} via yt-dlp", flush=True)
|
||||
try:
|
||||
subprocess.check_call(cmd)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"[refresh] yt-dlp failed: {e}", flush=True)
|
||||
raise
|
||||
|
||||
# Ensure language-suffixed SRT exists (Plex-friendly) if any subs were fetched
|
||||
try:
|
||||
# Pick any .srt just fetched that matches base
|
||||
for s in p.parent.glob(f"{p.stem}*.srt"):
|
||||
# If it's already lang-suffixed, keep; also copy to .en.srt when only plain .srt exists
|
||||
if s.name == f"{p.stem}.srt":
|
||||
shutil.copy2(s, p.with_suffix(".en.srt"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Rebuild NFO using fresh info.json (and RSS if available)
|
||||
try:
|
||||
# Try RSS match to enrich metadata (non-fatal if not present)
|
||||
ep = None
|
||||
try:
|
||||
ep = match_media_to_rss(p)
|
||||
except Exception:
|
||||
ep = None
|
||||
|
||||
fallback = {
|
||||
"title": p.stem,
|
||||
"episode_title": p.stem,
|
||||
"show": p.parent.name,
|
||||
"description": "",
|
||||
"pubdate": _extract_date_from_stem(p.stem),
|
||||
"duration_sec": media_duration_seconds(p),
|
||||
"image": "",
|
||||
"guid": "",
|
||||
}
|
||||
meta = build_meta_from_sources(p, p.parent.name, fallback, ep)
|
||||
# Save local artwork too
|
||||
try:
|
||||
save_episode_artwork(meta.get("image"), p, meta.get("show"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If a transcript already exists, include it in the NFO plot preview
|
||||
ttxt_path = (TRN / p.stem).with_suffix(".txt")
|
||||
ttxt = ttxt_path.read_text(encoding="utf-8") if ttxt_path.exists() else None
|
||||
write_episode_nfo(p, meta, ttxt)
|
||||
except Exception as e:
|
||||
print(f"[refresh] NFO/artwork update failed: {e}", flush=True)
|
||||
|
||||
log({"path": str(p), "status": "refresh-done"})
|
||||
print(f"[refresh] done for {p}", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
log({"path": path_str, "status": "error", "error": str(e)})
|
||||
raise
|
||||
|
||||
def handle_web(url: str):
|
||||
info = {"url": url, "status":"web-downloading", "title":"", "uploader":"", "date":"", "path":""}
|
||||
log(info)
|
||||
|
Reference in New Issue
Block a user