Added the ability to refresh metadata
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
|
||||
|
||||
import os
|
||||
from typing import Set
|
||||
import time
|
||||
import signal
|
||||
import sys
|
||||
@@ -20,13 +21,19 @@ JOB_TTL = int(os.getenv("JOB_TTL", "86400")) # 24 hours
|
||||
RESULT_TTL = int(os.getenv("RESULT_TTL", "86400")) # 24 hours
|
||||
FAILURE_TTL = int(os.getenv("FAILURE_TTL", "86400")) # 24 hours
|
||||
|
||||
# Optional refresh of existing items to fetch metadata/subtitles/thumbnails
|
||||
REFRESH_EXISTING = os.getenv("REFRESH_EXISTING", "1").strip() not in ("0", "false", "False", "")
|
||||
REFRESH_TTL = int(os.getenv("REFRESH_TTL", "21600")) # 6 hours
|
||||
REFRESH_FAILURE_TTL = int(os.getenv("REFRESH_FAILURE_TTL", "21600"))
|
||||
|
||||
# Media types to track
|
||||
MEDIA_EXT = {
|
||||
".mp3", ".m4a", ".mp4", ".mkv", ".wav", ".flac", ".webm", ".ogg", ".opus"
|
||||
}
|
||||
|
||||
# In-memory seen set to avoid re-enqueueing during a single run
|
||||
_seen: set[str] = set()
|
||||
_seen: Set[str] = set()
|
||||
_seen_refresh: Set[str] = set()
|
||||
|
||||
|
||||
def already_transcribed(p: Path) -> bool:
|
||||
@@ -35,6 +42,28 @@ def already_transcribed(p: Path) -> bool:
|
||||
return base_json.exists()
|
||||
|
||||
|
||||
# Helper to decide when to refresh sidecars
|
||||
def needs_refresh(p: Path) -> bool:
|
||||
"""
|
||||
Decide whether to refresh sidecars for a media file:
|
||||
- If metadata (*.info.json) is missing
|
||||
- If no subtitle SRT is present next to media (either .srt or .en.srt)
|
||||
- If no thumbnail JPG/PNG is present next to media
|
||||
"""
|
||||
stem = p.with_suffix("")
|
||||
info_json = stem.with_suffix(".info.json")
|
||||
# Accept any language-suffixed SRT as well
|
||||
srt_plain = stem.with_suffix(".srt")
|
||||
srt_en = p.with_suffix(".en.srt")
|
||||
has_any_srt = srt_plain.exists() or srt_en.exists() or any(p.parent.glob(p.stem + ".*.srt"))
|
||||
thumb_jpg = stem.with_suffix(".jpg")
|
||||
thumb_png = stem.with_suffix(".png")
|
||||
missing_info = not info_json.exists()
|
||||
missing_subs = not has_any_srt
|
||||
missing_thumb = not (thumb_jpg.exists() or thumb_png.exists())
|
||||
return missing_info or missing_subs or missing_thumb
|
||||
|
||||
|
||||
def iter_media_files(root: Path):
|
||||
for path in root.rglob("*"):
|
||||
if not path.is_file():
|
||||
@@ -57,7 +86,23 @@ def enqueue_new_files():
|
||||
continue
|
||||
if already_transcribed(p):
|
||||
_seen.add(key)
|
||||
print(f"[scanner] Skip (already transcribed): {p}", flush=True)
|
||||
if REFRESH_EXISTING and needs_refresh(p):
|
||||
if key not in _seen_refresh:
|
||||
# Ask worker to refresh metadata/subtitles/thumbnails without redownloading media
|
||||
q.enqueue(
|
||||
"worker.refresh_media",
|
||||
key,
|
||||
job_timeout=JOB_TIMEOUT,
|
||||
ttl=REFRESH_TTL,
|
||||
result_ttl=RESULT_TTL,
|
||||
failure_ttl=REFRESH_FAILURE_TTL,
|
||||
)
|
||||
_seen_refresh.add(key)
|
||||
print(f"[scanner] Refresh enqueued: {p}", flush=True)
|
||||
else:
|
||||
print(f"[scanner] Skip (already queued refresh): {p}", flush=True)
|
||||
else:
|
||||
print(f"[scanner] Skip (already transcribed): {p}", flush=True)
|
||||
continue
|
||||
# Enqueue the worker to process this local file (with generous timeouts)
|
||||
q.enqueue(
|
||||
|
Reference in New Issue
Block a user