Add media normalization

This commit is contained in:
2025-09-24 11:58:36 +02:00
parent 6240e86650
commit 1dc7005d4b
4 changed files with 342 additions and 0 deletions

View File

@@ -13,6 +13,20 @@ OPENWEBUI_AUTO_FIX_METADATA=1
# Optional: JSON string to enforce as metadata template when auto-fix runs
# OPENWEBUI_METADATA_TEMPLATE_JSON={}
# Media normalisation
MEDIA_NORMALIZE=1
MEDIA_NORMALIZE_KEEP_ORIGINAL=0
VIDEO_NORMALIZE_CODEC=hevc
VIDEO_NORMALIZE_EXTENSION=.mp4
VIDEO_NORMALIZE_CRF=28
VIDEO_NORMALIZE_PRESET=medium
VIDEO_NORMALIZE_AUDIO_CODEC=aac
VIDEO_NORMALIZE_AUDIO_BITRATE=160k
AUDIO_NORMALIZE_CODEC=libmp3lame
AUDIO_NORMALIZE_EXTENSION=.mp3
AUDIO_NORMALIZE_BITRATE=192k
AUDIO_NORMALIZE_CHANNELS=2
# Transcription backend (local Whisper by default)
TRANSCRIBE_BACKEND=local
OPENAI_API_KEY=

View File

@@ -43,6 +43,10 @@ Note: `.env.example` includes placeholders for both **Meili** and **OpenWebUI**
- `OPENWEBUI_KB_ID`: Fixed UUID of the Knowledge Base (avoids duplicate KBs on restart).
- `OPENWEBUI_AUTO_FIX_METADATA` (default `1`): When enabled, PodX clears/overrides the Knowledge Base metadata template before uploads to prevent ingestion crashes from invalid templates.
- `OPENWEBUI_METADATA_TEMPLATE_JSON`: Optional JSON applied when the auto-fix runs (defaults to `{}`, i.e., no custom metadata template).
- `MEDIA_NORMALIZE` (default `1`): Automatically transcode downloaded media into Plex-friendly formats (HEVC MP4 for video, MP3 for audio by default).
- `MEDIA_NORMALIZE_KEEP_ORIGINAL` (default `0`): Preserve the source file alongside the normalised copy (appends `.orig*`).
- `VIDEO_NORMALIZE_*`: Fine-tune video conversion (`VIDEO_NORMALIZE_CODEC`, `VIDEO_NORMALIZE_EXTENSION`, `VIDEO_NORMALIZE_CRF`, `VIDEO_NORMALIZE_PRESET`, `VIDEO_NORMALIZE_AUDIO_CODEC`, `VIDEO_NORMALIZE_AUDIO_BITRATE`).
- `AUDIO_NORMALIZE_*`: Control audio conversion (`AUDIO_NORMALIZE_CODEC`, `AUDIO_NORMALIZE_EXTENSION`, `AUDIO_NORMALIZE_BITRATE`, `AUDIO_NORMALIZE_CHANNELS`).
## RSS Ingestion

View File

@@ -94,6 +94,15 @@ RSS_INDEX_PATH = Path(os.getenv("RSS_INDEX_PATH", "/transcripts/rss_index.json")
RSS_DURATION_TOLERANCE = int(os.getenv("RSS_DURATION_TOLERANCE", "150")) # seconds
DEFAULT_TRANSCRIPT_LANG = os.getenv("DEFAULT_TRANSCRIPT_LANG", "en").strip() or "en"
def _clean_extension(raw: str, fallback: str) -> str:
raw = (raw or fallback or "").strip()
if not raw:
raw = fallback
if not raw.startswith("."):
raw = f".{raw}"
return raw.lower()
OWUI_URL = os.getenv("OPENWEBUI_URL", "").rstrip("/")
OWUI_KEY = os.getenv("OPENWEBUI_API_KEY", "")
OWUI_KB = os.getenv("OPENWEBUI_KB_NAME", "Homelab Library")
@@ -102,6 +111,23 @@ OWUI_METADATA_TEMPLATE_JSON = os.getenv("OPENWEBUI_METADATA_TEMPLATE_JSON", "").
_OWUI_TEMPLATE_PATCHED: set[str] = set()
# Media normalisation options (transcoding for Plex-friendly formats)
MEDIA_NORMALIZE = os.getenv("MEDIA_NORMALIZE", "1").strip().lower() not in ("0", "false", "no")
MEDIA_NORMALIZE_KEEP_ORIGINAL = os.getenv("MEDIA_NORMALIZE_KEEP_ORIGINAL", "0").strip().lower() in ("1", "true", "yes")
VIDEO_NORMALIZE_CODEC = os.getenv("VIDEO_NORMALIZE_CODEC", "hevc").strip().lower()
VIDEO_NORMALIZE_EXTENSION = _clean_extension(os.getenv("VIDEO_NORMALIZE_EXTENSION", ".mp4"), ".mp4")
VIDEO_NORMALIZE_CRF = os.getenv("VIDEO_NORMALIZE_CRF", "28").strip()
VIDEO_NORMALIZE_PRESET = os.getenv("VIDEO_NORMALIZE_PRESET", "medium").strip()
VIDEO_NORMALIZE_TUNE = os.getenv("VIDEO_NORMALIZE_TUNE", "").strip()
VIDEO_NORMALIZE_AUDIO_CODEC = os.getenv("VIDEO_NORMALIZE_AUDIO_CODEC", "aac").strip().lower()
VIDEO_NORMALIZE_AUDIO_BITRATE = os.getenv("VIDEO_NORMALIZE_AUDIO_BITRATE", "160k").strip()
AUDIO_NORMALIZE_CODEC = os.getenv("AUDIO_NORMALIZE_CODEC", "libmp3lame").strip()
AUDIO_NORMALIZE_EXTENSION = _clean_extension(os.getenv("AUDIO_NORMALIZE_EXTENSION", ".mp3"), ".mp3")
AUDIO_NORMALIZE_BITRATE = os.getenv("AUDIO_NORMALIZE_BITRATE", "192k").strip()
AUDIO_NORMALIZE_CHANNELS = os.getenv("AUDIO_NORMALIZE_CHANNELS", "2").strip()
# Redis-backed job queue settings and offload toggle
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0").strip()
OFFLOAD_TRANSCRIBE = os.getenv("OFFLOAD_TRANSCRIBE", "1").lower() not in ("0", "false", "no")
@@ -1409,6 +1435,251 @@ def owui_fix_metadata_template(kb_id: str, force: bool = False) -> bool:
return True
return False
# ---------- Media normalisation helpers ----------
VIDEO_ENCODER_MAP = {
"hevc": "libx265",
"h265": "libx265",
"h.265": "libx265",
"h264": "libx264",
"h.264": "libx264",
"av1": "libaom-av1",
}
AUDIO_ENCODER_MAP = {
"mp3": "libmp3lame",
"libmp3lame": "libmp3lame",
"aac": "aac",
"libfdk_aac": "libfdk_aac",
"opus": "libopus",
"flac": "flac",
}
def _resolve_video_encoder(codec: str) -> str:
key = (codec or "").lower()
return VIDEO_ENCODER_MAP.get(key, codec or "libx265")
def _resolve_audio_encoder(codec: str) -> str:
key = (codec or "").lower()
return AUDIO_ENCODER_MAP.get(key, codec or "libmp3lame")
def _ffprobe_streams(path: Path) -> dict[str, str]:
try:
out = subprocess.check_output(
["ffprobe", "-v", "error", "-show_entries", "stream=codec_type,codec_name", "-of", "json", str(path)],
text=True,
)
data = json.loads(out)
except Exception:
return {}
info: dict[str, str] = {"video": "", "audio": ""}
for stream in data.get("streams", []) or []:
ctype = (stream.get("codec_type") or "").lower()
cname = (stream.get("codec_name") or "").lower()
if ctype == "video" and not info["video"]:
info["video"] = cname
elif ctype == "audio" and not info["audio"]:
info["audio"] = cname
return info
def _unique_backup_path(path: Path) -> Path:
base = path.name
candidate = path.parent / f"{base}.orig"
if not candidate.exists():
return candidate
counter = 1
while True:
candidate = path.parent / f"{base}.orig{counter}"
if not candidate.exists():
return candidate
counter += 1
def _is_sidecar_name(name: str, base_stem: str, base_name: str) -> bool:
exact_suffixes = [".info.json", ".nfo", ".jpg", ".jpeg", ".png", ".webp", ".prov.json"]
for suf in exact_suffixes:
if name == f"{base_name}{suf}" or name == f"{base_stem}{suf}":
return True
text_exts = {".srt", ".vtt", ".txt", ".json", ".md"}
for ext in text_exts:
if name == f"{base_stem}{ext}" or name == f"{base_name}{ext}":
return True
if name.startswith(f"{base_stem}.") and name.endswith(ext):
return True
if name.startswith(f"{base_name}.") and name.endswith(ext):
return True
return False
def rename_media_sidecars(src: Path, dst: Path, skip: set[Path] | None = None) -> None:
if src == dst:
return
skip = skip or set()
parent = src.parent
stem_src, stem_dst = src.stem, dst.stem
name_src, name_dst = src.name, dst.name
for f in list(parent.glob("*")):
if not f.exists() or f == src or f == dst or f in skip:
continue
new_name = None
fname = f.name
if not _is_sidecar_name(fname, stem_src, name_src):
continue
if fname.startswith(name_src):
new_name = name_dst + fname[len(name_src):]
elif fname.startswith(stem_src):
new_name = stem_dst + fname[len(stem_src):]
if not new_name:
continue
target = parent / new_name
if target.exists():
continue
try:
f.rename(target)
except Exception:
pass
def _finalize_normalized_output(original: Path, final_path: Path, tmp_path: Path) -> Path:
if final_path.exists():
try:
final_path.unlink()
except Exception:
pass
if MEDIA_NORMALIZE_KEEP_ORIGINAL:
try:
backup = _unique_backup_path(original)
if original.exists():
original.rename(backup)
except Exception as e:
print(f"[normalize] could not preserve original for {original}: {e}", flush=True)
try:
if original.exists():
original.unlink()
except Exception:
pass
else:
try:
if original.exists():
original.unlink()
except Exception:
pass
os.replace(tmp_path, final_path)
return final_path
def _normalize_video_file(path: Path, info: dict[str, str]) -> Path:
current_codec = (info.get("video") or "").lower()
ext_match = path.suffix.lower() == VIDEO_NORMALIZE_EXTENSION
if current_codec == VIDEO_NORMALIZE_CODEC and ext_match:
return path
encoder = _resolve_video_encoder(VIDEO_NORMALIZE_CODEC)
final_path = path if ext_match else path.with_suffix(VIDEO_NORMALIZE_EXTENSION)
tmp_path = final_path.parent / f"{final_path.stem}.tmp{VIDEO_NORMALIZE_EXTENSION}"
if tmp_path.exists():
tmp_path.unlink()
cmd = [
"ffmpeg", "-nostdin", "-y",
"-i", str(path),
"-map", "0",
"-c:v", encoder,
]
if VIDEO_NORMALIZE_PRESET:
cmd.extend(["-preset", VIDEO_NORMALIZE_PRESET])
if VIDEO_NORMALIZE_TUNE:
cmd.extend(["-tune", VIDEO_NORMALIZE_TUNE])
if VIDEO_NORMALIZE_CRF:
cmd.extend(["-crf", VIDEO_NORMALIZE_CRF])
if info.get("audio"):
if VIDEO_NORMALIZE_AUDIO_CODEC == "copy":
cmd.extend(["-c:a", "copy"])
else:
cmd.extend(["-c:a", _resolve_audio_encoder(VIDEO_NORMALIZE_AUDIO_CODEC)])
if VIDEO_NORMALIZE_AUDIO_BITRATE:
cmd.extend(["-b:a", VIDEO_NORMALIZE_AUDIO_BITRATE])
else:
cmd.append("-an")
cmd.extend(["-c:s", "copy", str(tmp_path)])
print(f"[normalize] video -> {final_path.name} codec={VIDEO_NORMALIZE_CODEC}", flush=True)
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
if tmp_path.exists():
tmp_path.unlink()
raise RuntimeError(f"ffmpeg video normalize failed: {e}")
rename_media_sidecars(path, final_path, skip={tmp_path})
return _finalize_normalized_output(path, final_path, tmp_path)
def _normalize_audio_file(path: Path, info: dict[str, str]) -> Path:
current_codec = (info.get("audio") or "").lower()
ext_match = path.suffix.lower() == AUDIO_NORMALIZE_EXTENSION
target_encoder = _resolve_audio_encoder(AUDIO_NORMALIZE_CODEC)
equivalent_codecs = {AUDIO_NORMALIZE_CODEC.lower(), target_encoder.lower()}
if target_encoder.lower() == "libmp3lame":
equivalent_codecs.add("mp3")
if target_encoder.lower() in {"aac", "libfdk_aac"}:
equivalent_codecs.update({"aac", "mp4a"})
if current_codec in equivalent_codecs and ext_match:
return path
final_path = path if ext_match else path.with_suffix(AUDIO_NORMALIZE_EXTENSION)
tmp_path = final_path.parent / f"{final_path.stem}.tmp{AUDIO_NORMALIZE_EXTENSION}"
if tmp_path.exists():
tmp_path.unlink()
cmd = [
"ffmpeg", "-nostdin", "-y",
"-i", str(path),
"-vn",
"-c:a", target_encoder,
]
if AUDIO_NORMALIZE_BITRATE:
cmd.extend(["-b:a", AUDIO_NORMALIZE_BITRATE])
if AUDIO_NORMALIZE_CHANNELS:
cmd.extend(["-ac", AUDIO_NORMALIZE_CHANNELS])
cmd.append(str(tmp_path))
print(f"[normalize] audio -> {final_path.name} codec={AUDIO_NORMALIZE_CODEC}", flush=True)
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
if tmp_path.exists():
tmp_path.unlink()
raise RuntimeError(f"ffmpeg audio normalize failed: {e}")
rename_media_sidecars(path, final_path, skip={tmp_path})
return _finalize_normalized_output(path, final_path, tmp_path)
def normalize_media_file(path: Path) -> Path:
if not MEDIA_NORMALIZE or not path.exists() or not path.is_file():
return path
try:
info = _ffprobe_streams(path)
except Exception as e:
print(f"[normalize] ffprobe failed for {path}: {e}", flush=True)
return path
try:
if info.get("video"):
return _normalize_video_file(path, info)
if info.get("audio"):
return _normalize_audio_file(path, info)
except Exception as e:
print(f"[normalize] failed for {path}: {e}", flush=True)
return path
def owui_get_or_create_kb():
"""Return a KB id for OWUI_KB without creating duplicates.
Honors OPENWEBUI_KB_ID, and tolerates both list and {"data": ...} response shapes.
@@ -1600,6 +1871,11 @@ def handle_local_file(path_str: str):
if not p.exists():
log({"url": path_str, "status": "error", "error": "file_not_found"})
return
normalized = normalize_media_file(p)
if normalized != p:
print(f"[normalize] local media: {p.name} -> {normalized.name}", flush=True)
p = normalized
path_str = str(p)
if WORKER_MODE == "transcribe":
print(f"[mode] transcribe-only worker handling local file: {p}", flush=True)
@@ -1723,6 +1999,11 @@ def refresh_media(path_str: str):
if not p.exists() or not p.is_file():
log({"url": path_str, "status": "error", "error": "file_not_found"})
return
normalized = normalize_media_file(p)
if normalized != p:
print(f"[normalize] refresh media: {p.name} -> {normalized.name}", flush=True)
p = normalized
path_str = str(p)
# Locate existing info.json to get the original URL
info_json = None
@@ -1900,6 +2181,10 @@ def handle_url(url: str):
pass
except Exception:
pass
normalized_dest = normalize_media_file(dest)
if normalized_dest != dest:
print(f"[normalize] download media: {dest.name} -> {normalized_dest.name}", flush=True)
dest = normalized_dest
info.update({"title": dest.stem, "uploader": uploader,
"date": (re.findall(r"\b(\d{8})\b", dest.stem)[0] if re.findall(r"\b(\d{8})\b", dest.stem) else ""),
"path": str(dest)})

View File

@@ -18,6 +18,19 @@ services:
OPENAI_TRANSCRIBE_TIMEOUT: ${OPENAI_TRANSCRIBE_TIMEOUT:-600}
OPENWEBUI_AUTO_FIX_METADATA: ${OPENWEBUI_AUTO_FIX_METADATA:-1}
OPENWEBUI_METADATA_TEMPLATE_JSON: ${OPENWEBUI_METADATA_TEMPLATE_JSON:-}
MEDIA_NORMALIZE: ${MEDIA_NORMALIZE:-1}
MEDIA_NORMALIZE_KEEP_ORIGINAL: ${MEDIA_NORMALIZE_KEEP_ORIGINAL:-0}
VIDEO_NORMALIZE_CODEC: ${VIDEO_NORMALIZE_CODEC:-hevc}
VIDEO_NORMALIZE_EXTENSION: ${VIDEO_NORMALIZE_EXTENSION:-.mp4}
VIDEO_NORMALIZE_CRF: ${VIDEO_NORMALIZE_CRF:-28}
VIDEO_NORMALIZE_PRESET: ${VIDEO_NORMALIZE_PRESET:-medium}
VIDEO_NORMALIZE_TUNE: ${VIDEO_NORMALIZE_TUNE:-}
VIDEO_NORMALIZE_AUDIO_CODEC: ${VIDEO_NORMALIZE_AUDIO_CODEC:-aac}
VIDEO_NORMALIZE_AUDIO_BITRATE: ${VIDEO_NORMALIZE_AUDIO_BITRATE:-160k}
AUDIO_NORMALIZE_CODEC: ${AUDIO_NORMALIZE_CODEC:-libmp3lame}
AUDIO_NORMALIZE_EXTENSION: ${AUDIO_NORMALIZE_EXTENSION:-.mp3}
AUDIO_NORMALIZE_BITRATE: ${AUDIO_NORMALIZE_BITRATE:-192k}
AUDIO_NORMALIZE_CHANNELS: ${AUDIO_NORMALIZE_CHANNELS:-2}
OPENWEBUI_URL: ${OPENWEBUI_CONTAINER_URL:-http://open-webui:8080}
OPENWEBUI_API_KEY: ${OPENWEBUI_API_KEY}
OPENWEBUI_KB_NAME: ${OPENWEBUI_KB_NAME:-Homelab Library}
@@ -63,6 +76,19 @@ services:
OPENAI_TRANSCRIBE_TIMEOUT: ${OPENAI_TRANSCRIBE_TIMEOUT:-600}
OPENWEBUI_AUTO_FIX_METADATA: ${OPENWEBUI_AUTO_FIX_METADATA:-1}
OPENWEBUI_METADATA_TEMPLATE_JSON: ${OPENWEBUI_METADATA_TEMPLATE_JSON:-}
MEDIA_NORMALIZE: ${MEDIA_NORMALIZE:-1}
MEDIA_NORMALIZE_KEEP_ORIGINAL: ${MEDIA_NORMALIZE_KEEP_ORIGINAL:-0}
VIDEO_NORMALIZE_CODEC: ${VIDEO_NORMALIZE_CODEC:-hevc}
VIDEO_NORMALIZE_EXTENSION: ${VIDEO_NORMALIZE_EXTENSION:-.mp4}
VIDEO_NORMALIZE_CRF: ${VIDEO_NORMALIZE_CRF:-28}
VIDEO_NORMALIZE_PRESET: ${VIDEO_NORMALIZE_PRESET:-medium}
VIDEO_NORMALIZE_TUNE: ${VIDEO_NORMALIZE_TUNE:-}
VIDEO_NORMALIZE_AUDIO_CODEC: ${VIDEO_NORMALIZE_AUDIO_CODEC:-aac}
VIDEO_NORMALIZE_AUDIO_BITRATE: ${VIDEO_NORMALIZE_AUDIO_BITRATE:-160k}
AUDIO_NORMALIZE_CODEC: ${AUDIO_NORMALIZE_CODEC:-libmp3lame}
AUDIO_NORMALIZE_EXTENSION: ${AUDIO_NORMALIZE_EXTENSION:-.mp3}
AUDIO_NORMALIZE_BITRATE: ${AUDIO_NORMALIZE_BITRATE:-192k}
AUDIO_NORMALIZE_CHANNELS: ${AUDIO_NORMALIZE_CHANNELS:-2}
WORKER_MODE: all
OPENWEBUI_URL: ${OPENWEBUI_CONTAINER_URL:-http://open-webui:8080}
OPENWEBUI_API_KEY: ${OPENWEBUI_API_KEY}
@@ -110,6 +136,19 @@ services:
OPENAI_TRANSCRIBE_TIMEOUT: ${OPENAI_TRANSCRIBE_TIMEOUT:-600}
OPENWEBUI_AUTO_FIX_METADATA: ${OPENWEBUI_AUTO_FIX_METADATA:-1}
OPENWEBUI_METADATA_TEMPLATE_JSON: ${OPENWEBUI_METADATA_TEMPLATE_JSON:-}
MEDIA_NORMALIZE: ${MEDIA_NORMALIZE:-1}
MEDIA_NORMALIZE_KEEP_ORIGINAL: ${MEDIA_NORMALIZE_KEEP_ORIGINAL:-0}
VIDEO_NORMALIZE_CODEC: ${VIDEO_NORMALIZE_CODEC:-hevc}
VIDEO_NORMALIZE_EXTENSION: ${VIDEO_NORMALIZE_EXTENSION:-.mp4}
VIDEO_NORMALIZE_CRF: ${VIDEO_NORMALIZE_CRF:-28}
VIDEO_NORMALIZE_PRESET: ${VIDEO_NORMALIZE_PRESET:-medium}
VIDEO_NORMALIZE_TUNE: ${VIDEO_NORMALIZE_TUNE:-}
VIDEO_NORMALIZE_AUDIO_CODEC: ${VIDEO_NORMALIZE_AUDIO_CODEC:-aac}
VIDEO_NORMALIZE_AUDIO_BITRATE: ${VIDEO_NORMALIZE_AUDIO_BITRATE:-160k}
AUDIO_NORMALIZE_CODEC: ${AUDIO_NORMALIZE_CODEC:-libmp3lame}
AUDIO_NORMALIZE_EXTENSION: ${AUDIO_NORMALIZE_EXTENSION:-.mp3}
AUDIO_NORMALIZE_BITRATE: ${AUDIO_NORMALIZE_BITRATE:-192k}
AUDIO_NORMALIZE_CHANNELS: ${AUDIO_NORMALIZE_CHANNELS:-2}
WORKER_MODE: transcribe
OPENWEBUI_URL: ${OPENWEBUI_CONTAINER_URL:-http://open-webui:8080}
OPENWEBUI_API_KEY: ${OPENWEBUI_API_KEY}