diff --git a/app/worker.py b/app/worker.py index 4af503e..b68ccee 100644 --- a/app/worker.py +++ b/app/worker.py @@ -4,6 +4,8 @@ import math import difflib from faster_whisper import WhisperModel +from xml.sax.saxutils import escape as xml_escape + MEILI_URL = os.getenv("MEILI_URL", "http://meili:7700") MEILI_KEY = os.getenv("MEILI_KEY", "") LIB = Path(os.getenv("LIBRARY_ROOT", "/library")) @@ -197,6 +199,25 @@ def use_rss_transcript(media_path: Path, ep: dict) -> Path | None: "transcript_kind": sidecar.suffix.lower().lstrip("."), "transcript_url": _choose_transcript_url(ep)[0] or "", })) + # Write Kodi/Plex-compatible NFO + try: + # Gather metadata for NFO from RSS entry + meta = { + "title": ep.get("title"), + "episode_title": ep.get("title"), + "show": ep.get("podcast_title") or ep.get("feed_title") or ep.get("show"), + "description": ep.get("description") or ep.get("content"), + "pubdate": ep.get("pubdate"), + "pubdate_iso": ep.get("date_iso"), + "duration_sec": ep.get("duration_sec") or ep.get("duration"), + "image": ep.get("image") or ep.get("image_url"), + "guid": ep.get("guid"), + } + txt_path = base.with_suffix(".txt") + transcript_text = txt_path.read_text(encoding="utf-8") if txt_path.exists() else None + write_episode_nfo(media_path, meta, transcript_text) + except Exception as e: + print(f"[post] NFO write failed: {e}", flush=True) return base def find_sidecar_transcript(media_path: Path) -> Path | None: @@ -315,6 +336,24 @@ def reuse_repo_transcript(media_path: Path, repo_json: Path) -> Path | None: except Exception: pass + # Write Kodi/Plex-compatible NFO + try: + meta = { + "title": data.get("title") or media_path.stem, + "episode_title": data.get("title") or media_path.stem, + "show": data.get("show") or media_path.parent.name, + "description": data.get("description") or "", + "pubdate": data.get("pubdate") or data.get("date"), + "duration_sec": media_duration_seconds(media_path), + "image": data.get("image"), + "guid": data.get("guid") or data.get("id"), + } + txtp = new_base.with_suffix(".txt") + ttxt = txtp.read_text(encoding="utf-8") if txtp.exists() else None + write_episode_nfo(media_path, meta, ttxt) + except Exception as e: + print(f"[post] NFO write failed: {e}", flush=True) + return new_base except Exception as e: print(f"[resolver] failed to reuse repo transcript: {e}", flush=True) @@ -364,6 +403,84 @@ def ensure_sidecar_next_to_media(sidecar: Path, media_path: Path, lang: str = "e print(f"[post] sidecar copy/convert failed: {e}", flush=True) + +# ---------- Kodi/Plex NFO writer ---------- +from datetime import datetime + +def _first_nonempty(*vals): + for v in vals: + if v is None: + continue + if isinstance(v, str) and v.strip(): + return v.strip() + if v: + return v + return None + +def _coerce_aired(pubdate: str | None) -> str: + """Convert RSS-style pubdate to YYYY-MM-DD if possible.""" + if not pubdate: + return "" + # already ISO-like + m = re.match(r"^(\d{4})[-/](\d{2})[-/](\d{2})", pubdate) + if m: + return f"{m.group(1)}-{m.group(2)}-{m.group(3)}" + # RFC 2822 example: Tue, 21 Feb 2023 06:00:00 +0000 + try: + dt = datetime.strptime(pubdate[:31], "%a, %d %b %Y %H:%M:%S %z") + return dt.strftime("%Y-%m-%d") + except Exception: + # try without tz + try: + dt = datetime.strptime(pubdate[:25], "%a, %d %b %Y %H:%M:%S") + return dt.strftime("%Y-%m-%d") + except Exception: + return "" + +def write_episode_nfo(media_path: Path, meta: dict, transcript_text: str | None = None) -> Path: + """Write a minimal Kodi/Plex-compatible NFO next to the media file. + `meta` may include: title, show, plot, pubdate, duration_sec, thumb, guid. + """ + try: + title = _first_nonempty(meta.get("episode_title"), meta.get("title"), media_path.stem) or media_path.stem + show = _first_nonempty(meta.get("show"), meta.get("podcast_title"), meta.get("feed_title"), media_path.parent.name) or media_path.parent.name + plot = _first_nonempty(meta.get("description"), meta.get("content"), meta.get("summary"), "") or "" + # Optionally append transcript preview to plot + if transcript_text: + preview = transcript_text.strip() + if preview: + preview = (preview[:1800] + "…") if len(preview) > 1800 else preview + plot = (plot + "\n\n" if plot else "") + preview + aired = _coerce_aired(_first_nonempty(meta.get("pubdate_iso"), meta.get("pubdate"))) + guid = _first_nonempty(meta.get("guid"), meta.get("id"), "") or "" + thumb = _first_nonempty(meta.get("image"), meta.get("image_url"), meta.get("thumbnail"), "") or "" + dur_s = meta.get("duration_sec") or meta.get("duration") or 0 + try: + dur_min = int(round(float(dur_s) / 60.0)) if dur_s else 0 + except Exception: + dur_min = 0 + + # Build XML + xml = [""] + xml.append(f" {xml_escape(title)}") + xml.append(f" {xml_escape(show)}") + if plot: + xml.append(f" {xml_escape(plot)}") + if aired: + xml.append(f" {xml_escape(aired)}") + if guid: + xml.append(f" {xml_escape(guid)}") + if dur_min: + xml.append(f" {dur_min}") + if thumb: + xml.append(f" {xml_escape(thumb)}") + xml.append("\n") + nfo_path = media_path.with_suffix(".nfo") + nfo_path.write_text("\n".join(xml), encoding="utf-8") + return nfo_path + except Exception: + return media_path.with_suffix(".nfo") + def write_plain_transcript(media_path: Path, text: str, language: str = "en") -> Path: """Write minimal transcript artifacts (.txt + .json) from plain text (no timestamps).""" title = media_path.stem @@ -513,6 +630,22 @@ def transcribe(media_path: Path): shutil.copy2(srt_src, srt_dst) except Exception as e: print(f"[post] could not copy srt -> {srt_dst}: {e}", flush=True) + # Write Kodi/Plex-compatible NFO using basic metadata + try: + meta = { + "title": title, + "episode_title": title, + "show": media_path.parent.name, + "description": "", + "pubdate": _extract_date_from_stem(title), + "duration_sec": media_duration_seconds(media_path), + "image": "", + "guid": "", + } + ttxt = (TRN / title).with_suffix(".txt").read_text(encoding="utf-8") + write_episode_nfo(media_path, meta, ttxt) + except Exception as e: + print(f"[post] NFO write failed: {e}", flush=True) # Optional: cleanup temporary WAV try: @@ -695,6 +828,21 @@ def handle_local_file(path_str: str): ensure_sidecar_next_to_media(sidecar, p, lang=lang) index_meili(base.with_suffix(".json")) publish_to_openwebui([base.with_suffix(".txt")]) + try: + meta = { + "title": title, + "episode_title": title, + "show": p.parent.name, + "description": "", + "pubdate": _extract_date_from_stem(title), + "duration_sec": media_duration_seconds(p), + "image": "", + "guid": "", + } + ttxt = base.with_suffix(".txt").read_text(encoding="utf-8") + write_episode_nfo(p, meta, ttxt) + except Exception as e: + print(f"[post] NFO write failed: {e}", flush=True) log({**info, **{"status": "done", "note": "used_existing_transcript"}}) return @@ -705,6 +853,22 @@ def handle_local_file(path_str: str): if base: index_meili(base.with_suffix(".json")) publish_to_openwebui([base.with_suffix(".txt")]) + try: + data = json.loads((base.with_suffix(".json")).read_text(encoding="utf-8")) + meta = { + "title": data.get("title") or title, + "episode_title": data.get("title") or title, + "show": data.get("show") or p.parent.name, + "description": data.get("description") or "", + "pubdate": data.get("pubdate") or _extract_date_from_stem(title), + "duration_sec": media_duration_seconds(p), + "image": data.get("image"), + "guid": data.get("guid") or data.get("id"), + } + ttxt = base.with_suffix(".txt").read_text(encoding="utf-8") + write_episode_nfo(p, meta, ttxt) + except Exception as e: + print(f"[post] NFO write failed: {e}", flush=True) log({**info, **{"status": "done", "note": "reused_repo_transcript"}}) return @@ -772,6 +936,34 @@ def handle_url(url: str): base = transcribe(dest) index_meili(base.with_suffix(".json")) publish_to_openwebui([base.with_suffix(".txt")]) + try: + if 'ep' in locals() and ep: + meta = { + "title": ep.get("title"), + "episode_title": ep.get("title"), + "show": ep.get("podcast_title") or ep.get("feed_title") or ep.get("show") or uploader, + "description": ep.get("description") or ep.get("content"), + "pubdate": ep.get("pubdate"), + "pubdate_iso": ep.get("date_iso"), + "duration_sec": ep.get("duration_sec") or ep.get("duration") or media_duration_seconds(dest), + "image": ep.get("image") or ep.get("image_url"), + "guid": ep.get("guid"), + } + else: + meta = { + "title": dest.stem, + "episode_title": dest.stem, + "show": uploader, + "description": "", + "pubdate": _extract_date_from_stem(dest.stem), + "duration_sec": media_duration_seconds(dest), + "image": "", + "guid": "", + } + ttxt = base.with_suffix(".txt").read_text(encoding="utf-8") + write_episode_nfo(dest, meta, ttxt) + except Exception as e: + print(f"[post] NFO write failed: {e}", flush=True) log({**info, **{"status":"done"}}) except Exception as e: log({"url": url, "status":"error", "error": str(e)})