Bug fixes
This commit is contained in:
@@ -7,11 +7,14 @@ import xml.etree.ElementTree as ET
|
|||||||
# ---- Config ----
|
# ---- Config ----
|
||||||
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
|
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
|
||||||
OUT_INDEX = Path(os.getenv("RSS_INDEX_PATH", str(TRN / "rss_index.json")))
|
OUT_INDEX = Path(os.getenv("RSS_INDEX_PATH", str(TRN / "rss_index.json")))
|
||||||
FEEDS_FILE = Path(os.getenv("RSS_FEEDS_FILE", "/app/config/feeds.txt"))
|
FEEDS_FILE = Path(os.getenv("RSS_FEEDS_FILE", "/library/feeds.txt"))
|
||||||
FEEDS_ENV = os.getenv("RSS_FEEDS", "").strip()
|
FEEDS_ENV = os.getenv("RSS_FEEDS", "").strip()
|
||||||
TIMEOUT = int(os.getenv("RSS_HTTP_TIMEOUT", "30"))
|
TIMEOUT = int(os.getenv("RSS_HTTP_TIMEOUT", "30"))
|
||||||
DOWNLOAD_TRANSCRIPTS = os.getenv("RSS_DOWNLOAD_TRANSCRIPTS", "true").lower() in {"1", "true", "yes", "y"}
|
DOWNLOAD_TRANSCRIPTS = os.getenv("RSS_DOWNLOAD_TRANSCRIPTS", "true").lower() in {"1", "true", "yes", "y"}
|
||||||
DEFAULT_LANG = os.getenv("DEFAULT_TRANSCRIPT_LANG", "en").strip() or "en"
|
DEFAULT_LANG = os.getenv("DEFAULT_TRANSCRIPT_LANG", "en").strip() or "en"
|
||||||
|
RSS_SCAN_MINUTES = int(os.getenv("RSS_SCAN_MINUTES", "15"))
|
||||||
|
RSS_ONCE = os.getenv("RSS_ONCE", "0").lower() in {"1", "true", "yes", "y"}
|
||||||
|
AUDIO_MAX_MB = int(os.getenv("RSS_AUDIO_MAX_MB", "0")) # 0 = unlimited
|
||||||
|
|
||||||
# Where media files live; used to sidecar RSS transcripts next to matching media
|
# Where media files live; used to sidecar RSS transcripts next to matching media
|
||||||
LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
|
LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
|
||||||
@@ -130,6 +133,58 @@ def _guess_ext_from_type(mime: str) -> str:
|
|||||||
return ".txt"
|
return ".txt"
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_audio_ext(mime: str, url: str) -> str:
|
||||||
|
# Prefer by MIME; fall back to URL suffix
|
||||||
|
mime = (mime or "").lower()
|
||||||
|
if "mp3" in mime:
|
||||||
|
return ".mp3"
|
||||||
|
if "aac" in mime or "mp4a" in mime:
|
||||||
|
return ".m4a"
|
||||||
|
if "m4a" in mime:
|
||||||
|
return ".m4a"
|
||||||
|
if "ogg" in mime:
|
||||||
|
return ".ogg"
|
||||||
|
if "opus" in mime:
|
||||||
|
return ".opus"
|
||||||
|
if "flac" in mime:
|
||||||
|
return ".flac"
|
||||||
|
if "wav" in mime:
|
||||||
|
return ".wav"
|
||||||
|
# fallback by URL
|
||||||
|
suf = Path(urlparse(url).path).suffix.lower()
|
||||||
|
if suf in {".mp3", ".m4a", ".aac", ".ogg", ".opus", ".flac", ".wav"}:
|
||||||
|
return ".m4a" if suf == ".aac" else suf
|
||||||
|
return ".mp3"
|
||||||
|
|
||||||
|
def _download_stream(url: str, dst: Path) -> Path | None:
|
||||||
|
try:
|
||||||
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with requests.get(url, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"}, stream=True) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
max_bytes = AUDIO_MAX_MB * 1024 * 1024 if AUDIO_MAX_MB > 0 else None
|
||||||
|
total = 0
|
||||||
|
with open(dst, "wb") as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=1024 * 256):
|
||||||
|
if not chunk:
|
||||||
|
continue
|
||||||
|
f.write(chunk)
|
||||||
|
total += len(chunk)
|
||||||
|
if max_bytes and total > max_bytes:
|
||||||
|
# stop and remove partial
|
||||||
|
try:
|
||||||
|
f.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
dst.unlink(missing_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
return dst
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _norm_text(s: str) -> str:
|
def _norm_text(s: str) -> str:
|
||||||
s = (s or "").lower()
|
s = (s or "").lower()
|
||||||
s = re.sub(r"\s+", " ", s)
|
s = re.sub(r"\s+", " ", s)
|
||||||
@@ -224,12 +279,15 @@ def _gather_transcripts(item: ET.Element):
|
|||||||
def parse_feed(feed_url: str):
|
def parse_feed(feed_url: str):
|
||||||
items = []
|
items = []
|
||||||
try:
|
try:
|
||||||
|
print(f"[rss] fetching {feed_url}", flush=True)
|
||||||
r = requests.get(feed_url, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"})
|
r = requests.get(feed_url, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"})
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
root = ET.fromstring(r.content)
|
root = ET.fromstring(r.content)
|
||||||
|
|
||||||
channel = root.find("channel") or root
|
channel = root.find("channel") or root
|
||||||
show_title = _text(_find_ns(channel, "title")) or _text(_find_ns(root, "title"))
|
show_title = _text(_find_ns(channel, "title")) or _text(_find_ns(root, "title"))
|
||||||
|
if not show_title:
|
||||||
|
show_title = ""
|
||||||
|
|
||||||
for it in _iter_items(channel):
|
for it in _iter_items(channel):
|
||||||
title = _text(_find_ns(it, "title"))
|
title = _text(_find_ns(it, "title"))
|
||||||
@@ -320,12 +378,15 @@ def parse_feed(feed_url: str):
|
|||||||
|
|
||||||
items.append(item_rec)
|
items.append(item_rec)
|
||||||
|
|
||||||
|
print(f"[rss] parsed {len(items)} episode(s) from {show_title or feed_url}", flush=True)
|
||||||
return {"feed_url": feed_url, "show": show_title, "episodes": items}
|
return {"feed_url": feed_url, "show": show_title, "episodes": items}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print(f"[rss] ERROR parsing {feed_url}: {e}", flush=True)
|
||||||
return {"feed_url": feed_url, "error": str(e), "episodes": []}
|
return {"feed_url": feed_url, "error": str(e), "episodes": []}
|
||||||
|
|
||||||
|
|
||||||
def load_feeds_list():
|
def load_feeds_list():
|
||||||
|
print(f"[rss] FEEDS_FILE={FEEDS_FILE} FEEDS_ENV={'set' if bool(FEEDS_ENV) else 'unset'}", flush=True)
|
||||||
feeds = []
|
feeds = []
|
||||||
if FEEDS_ENV:
|
if FEEDS_ENV:
|
||||||
feeds.extend([u.strip() for u in FEEDS_ENV.split(",") if u.strip()])
|
feeds.extend([u.strip() for u in FEEDS_ENV.split(",") if u.strip()])
|
||||||
@@ -338,8 +399,12 @@ def load_feeds_list():
|
|||||||
feeds.append(line)
|
feeds.append(line)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
else:
|
||||||
|
print(f"[rss] feeds file not found: {FEEDS_FILE}", flush=True)
|
||||||
# unique, keep order
|
# unique, keep order
|
||||||
return sorted(list(dict.fromkeys(feeds)))
|
feeds = sorted(list(dict.fromkeys(feeds)))
|
||||||
|
print(f"[rss] parsed {len(feeds)} feed URL(s)", flush=True)
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
def build_index():
|
def build_index():
|
||||||
@@ -354,4 +419,11 @@ def build_index():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
build_index()
|
build_index()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[rss] build error: {e}", flush=True)
|
||||||
|
if RSS_ONCE:
|
||||||
|
break
|
||||||
|
time.sleep(max(1, RSS_SCAN_MINUTES) * 60)
|
Reference in New Issue
Block a user