From 1dc7005d4b56e947262846b96f8069b059405545 Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Wed, 24 Sep 2025 11:58:36 +0200 Subject: [PATCH] Add media normalization --- .env.example | 14 +++ README.md | 4 + app/worker.py | 285 +++++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 39 +++++++ 4 files changed, 342 insertions(+) diff --git a/.env.example b/.env.example index 8d5e3bc..970c5bc 100644 --- a/.env.example +++ b/.env.example @@ -13,6 +13,20 @@ OPENWEBUI_AUTO_FIX_METADATA=1 # Optional: JSON string to enforce as metadata template when auto-fix runs # OPENWEBUI_METADATA_TEMPLATE_JSON={} +# Media normalisation +MEDIA_NORMALIZE=1 +MEDIA_NORMALIZE_KEEP_ORIGINAL=0 +VIDEO_NORMALIZE_CODEC=hevc +VIDEO_NORMALIZE_EXTENSION=.mp4 +VIDEO_NORMALIZE_CRF=28 +VIDEO_NORMALIZE_PRESET=medium +VIDEO_NORMALIZE_AUDIO_CODEC=aac +VIDEO_NORMALIZE_AUDIO_BITRATE=160k +AUDIO_NORMALIZE_CODEC=libmp3lame +AUDIO_NORMALIZE_EXTENSION=.mp3 +AUDIO_NORMALIZE_BITRATE=192k +AUDIO_NORMALIZE_CHANNELS=2 + # Transcription backend (local Whisper by default) TRANSCRIBE_BACKEND=local OPENAI_API_KEY= diff --git a/README.md b/README.md index ebc3401..f739610 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,10 @@ Note: `.env.example` includes placeholders for both **Meili** and **OpenWebUI** - `OPENWEBUI_KB_ID`: Fixed UUID of the Knowledge Base (avoids duplicate KBs on restart). - `OPENWEBUI_AUTO_FIX_METADATA` (default `1`): When enabled, PodX clears/overrides the Knowledge Base metadata template before uploads to prevent ingestion crashes from invalid templates. - `OPENWEBUI_METADATA_TEMPLATE_JSON`: Optional JSON applied when the auto-fix runs (defaults to `{}`, i.e., no custom metadata template). +- `MEDIA_NORMALIZE` (default `1`): Automatically transcode downloaded media into Plex-friendly formats (HEVC MP4 for video, MP3 for audio by default). +- `MEDIA_NORMALIZE_KEEP_ORIGINAL` (default `0`): Preserve the source file alongside the normalised copy (appends `.orig*`). +- `VIDEO_NORMALIZE_*`: Fine-tune video conversion (`VIDEO_NORMALIZE_CODEC`, `VIDEO_NORMALIZE_EXTENSION`, `VIDEO_NORMALIZE_CRF`, `VIDEO_NORMALIZE_PRESET`, `VIDEO_NORMALIZE_AUDIO_CODEC`, `VIDEO_NORMALIZE_AUDIO_BITRATE`). +- `AUDIO_NORMALIZE_*`: Control audio conversion (`AUDIO_NORMALIZE_CODEC`, `AUDIO_NORMALIZE_EXTENSION`, `AUDIO_NORMALIZE_BITRATE`, `AUDIO_NORMALIZE_CHANNELS`). ## RSS Ingestion diff --git a/app/worker.py b/app/worker.py index c16a50b..6775909 100644 --- a/app/worker.py +++ b/app/worker.py @@ -94,6 +94,15 @@ RSS_INDEX_PATH = Path(os.getenv("RSS_INDEX_PATH", "/transcripts/rss_index.json") RSS_DURATION_TOLERANCE = int(os.getenv("RSS_DURATION_TOLERANCE", "150")) # seconds DEFAULT_TRANSCRIPT_LANG = os.getenv("DEFAULT_TRANSCRIPT_LANG", "en").strip() or "en" +def _clean_extension(raw: str, fallback: str) -> str: + raw = (raw or fallback or "").strip() + if not raw: + raw = fallback + if not raw.startswith("."): + raw = f".{raw}" + return raw.lower() + + OWUI_URL = os.getenv("OPENWEBUI_URL", "").rstrip("/") OWUI_KEY = os.getenv("OPENWEBUI_API_KEY", "") OWUI_KB = os.getenv("OPENWEBUI_KB_NAME", "Homelab Library") @@ -102,6 +111,23 @@ OWUI_METADATA_TEMPLATE_JSON = os.getenv("OPENWEBUI_METADATA_TEMPLATE_JSON", ""). _OWUI_TEMPLATE_PATCHED: set[str] = set() +# Media normalisation options (transcoding for Plex-friendly formats) +MEDIA_NORMALIZE = os.getenv("MEDIA_NORMALIZE", "1").strip().lower() not in ("0", "false", "no") +MEDIA_NORMALIZE_KEEP_ORIGINAL = os.getenv("MEDIA_NORMALIZE_KEEP_ORIGINAL", "0").strip().lower() in ("1", "true", "yes") + +VIDEO_NORMALIZE_CODEC = os.getenv("VIDEO_NORMALIZE_CODEC", "hevc").strip().lower() +VIDEO_NORMALIZE_EXTENSION = _clean_extension(os.getenv("VIDEO_NORMALIZE_EXTENSION", ".mp4"), ".mp4") +VIDEO_NORMALIZE_CRF = os.getenv("VIDEO_NORMALIZE_CRF", "28").strip() +VIDEO_NORMALIZE_PRESET = os.getenv("VIDEO_NORMALIZE_PRESET", "medium").strip() +VIDEO_NORMALIZE_TUNE = os.getenv("VIDEO_NORMALIZE_TUNE", "").strip() +VIDEO_NORMALIZE_AUDIO_CODEC = os.getenv("VIDEO_NORMALIZE_AUDIO_CODEC", "aac").strip().lower() +VIDEO_NORMALIZE_AUDIO_BITRATE = os.getenv("VIDEO_NORMALIZE_AUDIO_BITRATE", "160k").strip() + +AUDIO_NORMALIZE_CODEC = os.getenv("AUDIO_NORMALIZE_CODEC", "libmp3lame").strip() +AUDIO_NORMALIZE_EXTENSION = _clean_extension(os.getenv("AUDIO_NORMALIZE_EXTENSION", ".mp3"), ".mp3") +AUDIO_NORMALIZE_BITRATE = os.getenv("AUDIO_NORMALIZE_BITRATE", "192k").strip() +AUDIO_NORMALIZE_CHANNELS = os.getenv("AUDIO_NORMALIZE_CHANNELS", "2").strip() + # Redis-backed job queue settings and offload toggle REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0").strip() OFFLOAD_TRANSCRIBE = os.getenv("OFFLOAD_TRANSCRIBE", "1").lower() not in ("0", "false", "no") @@ -1409,6 +1435,251 @@ def owui_fix_metadata_template(kb_id: str, force: bool = False) -> bool: return True return False + +# ---------- Media normalisation helpers ---------- + +VIDEO_ENCODER_MAP = { + "hevc": "libx265", + "h265": "libx265", + "h.265": "libx265", + "h264": "libx264", + "h.264": "libx264", + "av1": "libaom-av1", +} + +AUDIO_ENCODER_MAP = { + "mp3": "libmp3lame", + "libmp3lame": "libmp3lame", + "aac": "aac", + "libfdk_aac": "libfdk_aac", + "opus": "libopus", + "flac": "flac", +} + + +def _resolve_video_encoder(codec: str) -> str: + key = (codec or "").lower() + return VIDEO_ENCODER_MAP.get(key, codec or "libx265") + + +def _resolve_audio_encoder(codec: str) -> str: + key = (codec or "").lower() + return AUDIO_ENCODER_MAP.get(key, codec or "libmp3lame") + + +def _ffprobe_streams(path: Path) -> dict[str, str]: + try: + out = subprocess.check_output( + ["ffprobe", "-v", "error", "-show_entries", "stream=codec_type,codec_name", "-of", "json", str(path)], + text=True, + ) + data = json.loads(out) + except Exception: + return {} + info: dict[str, str] = {"video": "", "audio": ""} + for stream in data.get("streams", []) or []: + ctype = (stream.get("codec_type") or "").lower() + cname = (stream.get("codec_name") or "").lower() + if ctype == "video" and not info["video"]: + info["video"] = cname + elif ctype == "audio" and not info["audio"]: + info["audio"] = cname + return info + + +def _unique_backup_path(path: Path) -> Path: + base = path.name + candidate = path.parent / f"{base}.orig" + if not candidate.exists(): + return candidate + counter = 1 + while True: + candidate = path.parent / f"{base}.orig{counter}" + if not candidate.exists(): + return candidate + counter += 1 + + +def _is_sidecar_name(name: str, base_stem: str, base_name: str) -> bool: + exact_suffixes = [".info.json", ".nfo", ".jpg", ".jpeg", ".png", ".webp", ".prov.json"] + for suf in exact_suffixes: + if name == f"{base_name}{suf}" or name == f"{base_stem}{suf}": + return True + text_exts = {".srt", ".vtt", ".txt", ".json", ".md"} + for ext in text_exts: + if name == f"{base_stem}{ext}" or name == f"{base_name}{ext}": + return True + if name.startswith(f"{base_stem}.") and name.endswith(ext): + return True + if name.startswith(f"{base_name}.") and name.endswith(ext): + return True + return False + + +def rename_media_sidecars(src: Path, dst: Path, skip: set[Path] | None = None) -> None: + if src == dst: + return + skip = skip or set() + parent = src.parent + stem_src, stem_dst = src.stem, dst.stem + name_src, name_dst = src.name, dst.name + for f in list(parent.glob("*")): + if not f.exists() or f == src or f == dst or f in skip: + continue + new_name = None + fname = f.name + if not _is_sidecar_name(fname, stem_src, name_src): + continue + if fname.startswith(name_src): + new_name = name_dst + fname[len(name_src):] + elif fname.startswith(stem_src): + new_name = stem_dst + fname[len(stem_src):] + if not new_name: + continue + target = parent / new_name + if target.exists(): + continue + try: + f.rename(target) + except Exception: + pass + + +def _finalize_normalized_output(original: Path, final_path: Path, tmp_path: Path) -> Path: + if final_path.exists(): + try: + final_path.unlink() + except Exception: + pass + if MEDIA_NORMALIZE_KEEP_ORIGINAL: + try: + backup = _unique_backup_path(original) + if original.exists(): + original.rename(backup) + except Exception as e: + print(f"[normalize] could not preserve original for {original}: {e}", flush=True) + try: + if original.exists(): + original.unlink() + except Exception: + pass + else: + try: + if original.exists(): + original.unlink() + except Exception: + pass + os.replace(tmp_path, final_path) + return final_path + + +def _normalize_video_file(path: Path, info: dict[str, str]) -> Path: + current_codec = (info.get("video") or "").lower() + ext_match = path.suffix.lower() == VIDEO_NORMALIZE_EXTENSION + if current_codec == VIDEO_NORMALIZE_CODEC and ext_match: + return path + + encoder = _resolve_video_encoder(VIDEO_NORMALIZE_CODEC) + final_path = path if ext_match else path.with_suffix(VIDEO_NORMALIZE_EXTENSION) + tmp_path = final_path.parent / f"{final_path.stem}.tmp{VIDEO_NORMALIZE_EXTENSION}" + if tmp_path.exists(): + tmp_path.unlink() + + cmd = [ + "ffmpeg", "-nostdin", "-y", + "-i", str(path), + "-map", "0", + "-c:v", encoder, + ] + if VIDEO_NORMALIZE_PRESET: + cmd.extend(["-preset", VIDEO_NORMALIZE_PRESET]) + if VIDEO_NORMALIZE_TUNE: + cmd.extend(["-tune", VIDEO_NORMALIZE_TUNE]) + if VIDEO_NORMALIZE_CRF: + cmd.extend(["-crf", VIDEO_NORMALIZE_CRF]) + + if info.get("audio"): + if VIDEO_NORMALIZE_AUDIO_CODEC == "copy": + cmd.extend(["-c:a", "copy"]) + else: + cmd.extend(["-c:a", _resolve_audio_encoder(VIDEO_NORMALIZE_AUDIO_CODEC)]) + if VIDEO_NORMALIZE_AUDIO_BITRATE: + cmd.extend(["-b:a", VIDEO_NORMALIZE_AUDIO_BITRATE]) + else: + cmd.append("-an") + + cmd.extend(["-c:s", "copy", str(tmp_path)]) + + print(f"[normalize] video -> {final_path.name} codec={VIDEO_NORMALIZE_CODEC}", flush=True) + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as e: + if tmp_path.exists(): + tmp_path.unlink() + raise RuntimeError(f"ffmpeg video normalize failed: {e}") + + rename_media_sidecars(path, final_path, skip={tmp_path}) + return _finalize_normalized_output(path, final_path, tmp_path) + + +def _normalize_audio_file(path: Path, info: dict[str, str]) -> Path: + current_codec = (info.get("audio") or "").lower() + ext_match = path.suffix.lower() == AUDIO_NORMALIZE_EXTENSION + target_encoder = _resolve_audio_encoder(AUDIO_NORMALIZE_CODEC) + equivalent_codecs = {AUDIO_NORMALIZE_CODEC.lower(), target_encoder.lower()} + if target_encoder.lower() == "libmp3lame": + equivalent_codecs.add("mp3") + if target_encoder.lower() in {"aac", "libfdk_aac"}: + equivalent_codecs.update({"aac", "mp4a"}) + if current_codec in equivalent_codecs and ext_match: + return path + + final_path = path if ext_match else path.with_suffix(AUDIO_NORMALIZE_EXTENSION) + tmp_path = final_path.parent / f"{final_path.stem}.tmp{AUDIO_NORMALIZE_EXTENSION}" + if tmp_path.exists(): + tmp_path.unlink() + + cmd = [ + "ffmpeg", "-nostdin", "-y", + "-i", str(path), + "-vn", + "-c:a", target_encoder, + ] + if AUDIO_NORMALIZE_BITRATE: + cmd.extend(["-b:a", AUDIO_NORMALIZE_BITRATE]) + if AUDIO_NORMALIZE_CHANNELS: + cmd.extend(["-ac", AUDIO_NORMALIZE_CHANNELS]) + cmd.append(str(tmp_path)) + + print(f"[normalize] audio -> {final_path.name} codec={AUDIO_NORMALIZE_CODEC}", flush=True) + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as e: + if tmp_path.exists(): + tmp_path.unlink() + raise RuntimeError(f"ffmpeg audio normalize failed: {e}") + + rename_media_sidecars(path, final_path, skip={tmp_path}) + return _finalize_normalized_output(path, final_path, tmp_path) + + +def normalize_media_file(path: Path) -> Path: + if not MEDIA_NORMALIZE or not path.exists() or not path.is_file(): + return path + try: + info = _ffprobe_streams(path) + except Exception as e: + print(f"[normalize] ffprobe failed for {path}: {e}", flush=True) + return path + try: + if info.get("video"): + return _normalize_video_file(path, info) + if info.get("audio"): + return _normalize_audio_file(path, info) + except Exception as e: + print(f"[normalize] failed for {path}: {e}", flush=True) + return path + def owui_get_or_create_kb(): """Return a KB id for OWUI_KB without creating duplicates. Honors OPENWEBUI_KB_ID, and tolerates both list and {"data": ...} response shapes. @@ -1600,6 +1871,11 @@ def handle_local_file(path_str: str): if not p.exists(): log({"url": path_str, "status": "error", "error": "file_not_found"}) return + normalized = normalize_media_file(p) + if normalized != p: + print(f"[normalize] local media: {p.name} -> {normalized.name}", flush=True) + p = normalized + path_str = str(p) if WORKER_MODE == "transcribe": print(f"[mode] transcribe-only worker handling local file: {p}", flush=True) @@ -1723,6 +1999,11 @@ def refresh_media(path_str: str): if not p.exists() or not p.is_file(): log({"url": path_str, "status": "error", "error": "file_not_found"}) return + normalized = normalize_media_file(p) + if normalized != p: + print(f"[normalize] refresh media: {p.name} -> {normalized.name}", flush=True) + p = normalized + path_str = str(p) # Locate existing info.json to get the original URL info_json = None @@ -1900,6 +2181,10 @@ def handle_url(url: str): pass except Exception: pass + normalized_dest = normalize_media_file(dest) + if normalized_dest != dest: + print(f"[normalize] download media: {dest.name} -> {normalized_dest.name}", flush=True) + dest = normalized_dest info.update({"title": dest.stem, "uploader": uploader, "date": (re.findall(r"\b(\d{8})\b", dest.stem)[0] if re.findall(r"\b(\d{8})\b", dest.stem) else ""), "path": str(dest)}) diff --git a/docker-compose.yml b/docker-compose.yml index 71d894e..fae02b8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,6 +18,19 @@ services: OPENAI_TRANSCRIBE_TIMEOUT: ${OPENAI_TRANSCRIBE_TIMEOUT:-600} OPENWEBUI_AUTO_FIX_METADATA: ${OPENWEBUI_AUTO_FIX_METADATA:-1} OPENWEBUI_METADATA_TEMPLATE_JSON: ${OPENWEBUI_METADATA_TEMPLATE_JSON:-} + MEDIA_NORMALIZE: ${MEDIA_NORMALIZE:-1} + MEDIA_NORMALIZE_KEEP_ORIGINAL: ${MEDIA_NORMALIZE_KEEP_ORIGINAL:-0} + VIDEO_NORMALIZE_CODEC: ${VIDEO_NORMALIZE_CODEC:-hevc} + VIDEO_NORMALIZE_EXTENSION: ${VIDEO_NORMALIZE_EXTENSION:-.mp4} + VIDEO_NORMALIZE_CRF: ${VIDEO_NORMALIZE_CRF:-28} + VIDEO_NORMALIZE_PRESET: ${VIDEO_NORMALIZE_PRESET:-medium} + VIDEO_NORMALIZE_TUNE: ${VIDEO_NORMALIZE_TUNE:-} + VIDEO_NORMALIZE_AUDIO_CODEC: ${VIDEO_NORMALIZE_AUDIO_CODEC:-aac} + VIDEO_NORMALIZE_AUDIO_BITRATE: ${VIDEO_NORMALIZE_AUDIO_BITRATE:-160k} + AUDIO_NORMALIZE_CODEC: ${AUDIO_NORMALIZE_CODEC:-libmp3lame} + AUDIO_NORMALIZE_EXTENSION: ${AUDIO_NORMALIZE_EXTENSION:-.mp3} + AUDIO_NORMALIZE_BITRATE: ${AUDIO_NORMALIZE_BITRATE:-192k} + AUDIO_NORMALIZE_CHANNELS: ${AUDIO_NORMALIZE_CHANNELS:-2} OPENWEBUI_URL: ${OPENWEBUI_CONTAINER_URL:-http://open-webui:8080} OPENWEBUI_API_KEY: ${OPENWEBUI_API_KEY} OPENWEBUI_KB_NAME: ${OPENWEBUI_KB_NAME:-Homelab Library} @@ -63,6 +76,19 @@ services: OPENAI_TRANSCRIBE_TIMEOUT: ${OPENAI_TRANSCRIBE_TIMEOUT:-600} OPENWEBUI_AUTO_FIX_METADATA: ${OPENWEBUI_AUTO_FIX_METADATA:-1} OPENWEBUI_METADATA_TEMPLATE_JSON: ${OPENWEBUI_METADATA_TEMPLATE_JSON:-} + MEDIA_NORMALIZE: ${MEDIA_NORMALIZE:-1} + MEDIA_NORMALIZE_KEEP_ORIGINAL: ${MEDIA_NORMALIZE_KEEP_ORIGINAL:-0} + VIDEO_NORMALIZE_CODEC: ${VIDEO_NORMALIZE_CODEC:-hevc} + VIDEO_NORMALIZE_EXTENSION: ${VIDEO_NORMALIZE_EXTENSION:-.mp4} + VIDEO_NORMALIZE_CRF: ${VIDEO_NORMALIZE_CRF:-28} + VIDEO_NORMALIZE_PRESET: ${VIDEO_NORMALIZE_PRESET:-medium} + VIDEO_NORMALIZE_TUNE: ${VIDEO_NORMALIZE_TUNE:-} + VIDEO_NORMALIZE_AUDIO_CODEC: ${VIDEO_NORMALIZE_AUDIO_CODEC:-aac} + VIDEO_NORMALIZE_AUDIO_BITRATE: ${VIDEO_NORMALIZE_AUDIO_BITRATE:-160k} + AUDIO_NORMALIZE_CODEC: ${AUDIO_NORMALIZE_CODEC:-libmp3lame} + AUDIO_NORMALIZE_EXTENSION: ${AUDIO_NORMALIZE_EXTENSION:-.mp3} + AUDIO_NORMALIZE_BITRATE: ${AUDIO_NORMALIZE_BITRATE:-192k} + AUDIO_NORMALIZE_CHANNELS: ${AUDIO_NORMALIZE_CHANNELS:-2} WORKER_MODE: all OPENWEBUI_URL: ${OPENWEBUI_CONTAINER_URL:-http://open-webui:8080} OPENWEBUI_API_KEY: ${OPENWEBUI_API_KEY} @@ -110,6 +136,19 @@ services: OPENAI_TRANSCRIBE_TIMEOUT: ${OPENAI_TRANSCRIBE_TIMEOUT:-600} OPENWEBUI_AUTO_FIX_METADATA: ${OPENWEBUI_AUTO_FIX_METADATA:-1} OPENWEBUI_METADATA_TEMPLATE_JSON: ${OPENWEBUI_METADATA_TEMPLATE_JSON:-} + MEDIA_NORMALIZE: ${MEDIA_NORMALIZE:-1} + MEDIA_NORMALIZE_KEEP_ORIGINAL: ${MEDIA_NORMALIZE_KEEP_ORIGINAL:-0} + VIDEO_NORMALIZE_CODEC: ${VIDEO_NORMALIZE_CODEC:-hevc} + VIDEO_NORMALIZE_EXTENSION: ${VIDEO_NORMALIZE_EXTENSION:-.mp4} + VIDEO_NORMALIZE_CRF: ${VIDEO_NORMALIZE_CRF:-28} + VIDEO_NORMALIZE_PRESET: ${VIDEO_NORMALIZE_PRESET:-medium} + VIDEO_NORMALIZE_TUNE: ${VIDEO_NORMALIZE_TUNE:-} + VIDEO_NORMALIZE_AUDIO_CODEC: ${VIDEO_NORMALIZE_AUDIO_CODEC:-aac} + VIDEO_NORMALIZE_AUDIO_BITRATE: ${VIDEO_NORMALIZE_AUDIO_BITRATE:-160k} + AUDIO_NORMALIZE_CODEC: ${AUDIO_NORMALIZE_CODEC:-libmp3lame} + AUDIO_NORMALIZE_EXTENSION: ${AUDIO_NORMALIZE_EXTENSION:-.mp3} + AUDIO_NORMALIZE_BITRATE: ${AUDIO_NORMALIZE_BITRATE:-192k} + AUDIO_NORMALIZE_CHANNELS: ${AUDIO_NORMALIZE_CHANNELS:-2} WORKER_MODE: transcribe OPENWEBUI_URL: ${OPENWEBUI_CONTAINER_URL:-http://open-webui:8080} OPENWEBUI_API_KEY: ${OPENWEBUI_API_KEY}