Added the ability to refresh metadata
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from typing import Set
|
||||||
import time
|
import time
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
@@ -20,13 +21,19 @@ JOB_TTL = int(os.getenv("JOB_TTL", "86400")) # 24 hours
|
|||||||
RESULT_TTL = int(os.getenv("RESULT_TTL", "86400")) # 24 hours
|
RESULT_TTL = int(os.getenv("RESULT_TTL", "86400")) # 24 hours
|
||||||
FAILURE_TTL = int(os.getenv("FAILURE_TTL", "86400")) # 24 hours
|
FAILURE_TTL = int(os.getenv("FAILURE_TTL", "86400")) # 24 hours
|
||||||
|
|
||||||
|
# Optional refresh of existing items to fetch metadata/subtitles/thumbnails
|
||||||
|
REFRESH_EXISTING = os.getenv("REFRESH_EXISTING", "1").strip() not in ("0", "false", "False", "")
|
||||||
|
REFRESH_TTL = int(os.getenv("REFRESH_TTL", "21600")) # 6 hours
|
||||||
|
REFRESH_FAILURE_TTL = int(os.getenv("REFRESH_FAILURE_TTL", "21600"))
|
||||||
|
|
||||||
# Media types to track
|
# Media types to track
|
||||||
MEDIA_EXT = {
|
MEDIA_EXT = {
|
||||||
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
|
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
|
||||||
}
|
}
|
||||||
|
|
||||||
# In-memory seen set to avoid re-enqueueing during a single run
|
# In-memory seen set to avoid re-enqueueing during a single run
|
||||||
_seen: set[str] = set()
|
_seen: Set[str] = set()
|
||||||
|
_seen_refresh: Set[str] = set()
|
||||||
|
|
||||||
|
|
||||||
def already_transcribed(p: Path) -> bool:
|
def already_transcribed(p: Path) -> bool:
|
||||||
@@ -35,6 +42,28 @@ def already_transcribed(p: Path) -> bool:
|
|||||||
return base_json.exists()
|
return base_json.exists()
|
||||||
|
|
||||||
|
|
||||||
|
# Helper to decide when to refresh sidecars
|
||||||
|
def needs_refresh(p: Path) -> bool:
|
||||||
|
"""
|
||||||
|
Decide whether to refresh sidecars for a media file:
|
||||||
|
- If metadata (*.info.json) is missing
|
||||||
|
- If no subtitle SRT is present next to media (either .srt or .en.srt)
|
||||||
|
- If no thumbnail JPG/PNG is present next to media
|
||||||
|
"""
|
||||||
|
stem = p.with_suffix("")
|
||||||
|
info_json = stem.with_suffix(".info.json")
|
||||||
|
# Accept any language-suffixed SRT as well
|
||||||
|
srt_plain = stem.with_suffix(".srt")
|
||||||
|
srt_en = p.with_suffix(".en.srt")
|
||||||
|
has_any_srt = srt_plain.exists() or srt_en.exists() or any(p.parent.glob(p.stem + ".*.srt"))
|
||||||
|
thumb_jpg = stem.with_suffix(".jpg")
|
||||||
|
thumb_png = stem.with_suffix(".png")
|
||||||
|
missing_info = not info_json.exists()
|
||||||
|
missing_subs = not has_any_srt
|
||||||
|
missing_thumb = not (thumb_jpg.exists() or thumb_png.exists())
|
||||||
|
return missing_info or missing_subs or missing_thumb
|
||||||
|
|
||||||
|
|
||||||
def iter_media_files(root: Path):
|
def iter_media_files(root: Path):
|
||||||
for path in root.rglob("*"):
|
for path in root.rglob("*"):
|
||||||
if not path.is_file():
|
if not path.is_file():
|
||||||
@@ -57,7 +86,23 @@ def enqueue_new_files():
|
|||||||
continue
|
continue
|
||||||
if already_transcribed(p):
|
if already_transcribed(p):
|
||||||
_seen.add(key)
|
_seen.add(key)
|
||||||
print(f"[scanner] Skip (already transcribed): {p}", flush=True)
|
if REFRESH_EXISTING and needs_refresh(p):
|
||||||
|
if key not in _seen_refresh:
|
||||||
|
# Ask worker to refresh metadata/subtitles/thumbnails without redownloading media
|
||||||
|
q.enqueue(
|
||||||
|
"worker.refresh_media",
|
||||||
|
key,
|
||||||
|
job_timeout=JOB_TIMEOUT,
|
||||||
|
ttl=REFRESH_TTL,
|
||||||
|
result_ttl=RESULT_TTL,
|
||||||
|
failure_ttl=REFRESH_FAILURE_TTL,
|
||||||
|
)
|
||||||
|
_seen_refresh.add(key)
|
||||||
|
print(f"[scanner] Refresh enqueued: {p}", flush=True)
|
||||||
|
else:
|
||||||
|
print(f"[scanner] Skip (already queued refresh): {p}", flush=True)
|
||||||
|
else:
|
||||||
|
print(f"[scanner] Skip (already transcribed): {p}", flush=True)
|
||||||
continue
|
continue
|
||||||
# Enqueue the worker to process this local file (with generous timeouts)
|
# Enqueue the worker to process this local file (with generous timeouts)
|
||||||
q.enqueue(
|
q.enqueue(
|
||||||
|
106
app/worker.py
106
app/worker.py
@@ -1280,6 +1280,112 @@ def handle_local_file(path_str: str):
|
|||||||
log({"url": path_str, "status": "error", "error": str(e)})
|
log({"url": path_str, "status": "error", "error": str(e)})
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
# --- Refresh sidecar metadata and subtitles for an already-downloaded media file ---
|
||||||
|
def refresh_media(path_str: str):
|
||||||
|
"""
|
||||||
|
Refresh sidecar metadata (info.json, thumbnail) and subtitles for an already-downloaded media file.
|
||||||
|
Requires a companion .info.json next to the media (to supply the original URL). No media re-download.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
p = Path(path_str)
|
||||||
|
if not p.exists() or not p.is_file():
|
||||||
|
log({"url": path_str, "status": "error", "error": "file_not_found"})
|
||||||
|
return
|
||||||
|
|
||||||
|
# Locate existing info.json to get the original URL
|
||||||
|
info_json = None
|
||||||
|
for cand in [p.parent / f"{p.name}.info.json", p.parent / f"{p.stem}.info.json"]:
|
||||||
|
if cand.exists():
|
||||||
|
info_json = cand
|
||||||
|
break
|
||||||
|
|
||||||
|
if not info_json:
|
||||||
|
log({"path": str(p), "status": "refresh-skip", "reason": "no_info_json"})
|
||||||
|
print(f"[refresh] skip: no info.json next to {p}", flush=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
info = load_info_json(info_json) or {}
|
||||||
|
url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
||||||
|
if not url:
|
||||||
|
log({"path": str(p), "status": "refresh-skip", "reason": "no_url_in_info"})
|
||||||
|
print(f"[refresh] skip: no URL in {info_json}", flush=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Prepare yt-dlp command to refresh sidecars only, writing files exactly next to the media
|
||||||
|
outtmpl = str(p.with_suffix(".%(ext)s"))
|
||||||
|
sub_langs = os.getenv("YTDLP_SUBS_LANGS", "en.*,en")
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
"yt-dlp",
|
||||||
|
"--skip-download",
|
||||||
|
"--write-info-json",
|
||||||
|
"--write-thumbnail",
|
||||||
|
"--convert-thumbnails", "jpg",
|
||||||
|
"--write-subs", "--write-auto-subs",
|
||||||
|
"--sub-langs", sub_langs,
|
||||||
|
"--convert-subs", "srt",
|
||||||
|
"-o", outtmpl,
|
||||||
|
url,
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"[refresh] refreshing sidecars for {p} via yt-dlp", flush=True)
|
||||||
|
try:
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"[refresh] yt-dlp failed: {e}", flush=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Ensure language-suffixed SRT exists (Plex-friendly) if any subs were fetched
|
||||||
|
try:
|
||||||
|
# Pick any .srt just fetched that matches base
|
||||||
|
for s in p.parent.glob(f"{p.stem}*.srt"):
|
||||||
|
# If it's already lang-suffixed, keep; also copy to .en.srt when only plain .srt exists
|
||||||
|
if s.name == f"{p.stem}.srt":
|
||||||
|
shutil.copy2(s, p.with_suffix(".en.srt"))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Rebuild NFO using fresh info.json (and RSS if available)
|
||||||
|
try:
|
||||||
|
# Try RSS match to enrich metadata (non-fatal if not present)
|
||||||
|
ep = None
|
||||||
|
try:
|
||||||
|
ep = match_media_to_rss(p)
|
||||||
|
except Exception:
|
||||||
|
ep = None
|
||||||
|
|
||||||
|
fallback = {
|
||||||
|
"title": p.stem,
|
||||||
|
"episode_title": p.stem,
|
||||||
|
"show": p.parent.name,
|
||||||
|
"description": "",
|
||||||
|
"pubdate": _extract_date_from_stem(p.stem),
|
||||||
|
"duration_sec": media_duration_seconds(p),
|
||||||
|
"image": "",
|
||||||
|
"guid": "",
|
||||||
|
}
|
||||||
|
meta = build_meta_from_sources(p, p.parent.name, fallback, ep)
|
||||||
|
# Save local artwork too
|
||||||
|
try:
|
||||||
|
save_episode_artwork(meta.get("image"), p, meta.get("show"))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If a transcript already exists, include it in the NFO plot preview
|
||||||
|
ttxt_path = (TRN / p.stem).with_suffix(".txt")
|
||||||
|
ttxt = ttxt_path.read_text(encoding="utf-8") if ttxt_path.exists() else None
|
||||||
|
write_episode_nfo(p, meta, ttxt)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[refresh] NFO/artwork update failed: {e}", flush=True)
|
||||||
|
|
||||||
|
log({"path": str(p), "status": "refresh-done"})
|
||||||
|
print(f"[refresh] done for {p}", flush=True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log({"path": path_str, "status": "error", "error": str(e)})
|
||||||
|
raise
|
||||||
|
|
||||||
def handle_web(url: str):
|
def handle_web(url: str):
|
||||||
info = {"url": url, "status":"web-downloading", "title":"", "uploader":"", "date":"", "path":""}
|
info = {"url": url, "status":"web-downloading", "title":"", "uploader":"", "date":"", "path":""}
|
||||||
log(info)
|
log(info)
|
||||||
|
Reference in New Issue
Block a user