Bug fixes
This commit is contained in:
@@ -7,11 +7,14 @@ import xml.etree.ElementTree as ET
|
||||
# ---- Config ----
|
||||
TRN = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts"))
|
||||
OUT_INDEX = Path(os.getenv("RSS_INDEX_PATH", str(TRN / "rss_index.json")))
|
||||
FEEDS_FILE = Path(os.getenv("RSS_FEEDS_FILE", "/app/config/feeds.txt"))
|
||||
FEEDS_FILE = Path(os.getenv("RSS_FEEDS_FILE", "/library/feeds.txt"))
|
||||
FEEDS_ENV = os.getenv("RSS_FEEDS", "").strip()
|
||||
TIMEOUT = int(os.getenv("RSS_HTTP_TIMEOUT", "30"))
|
||||
DOWNLOAD_TRANSCRIPTS = os.getenv("RSS_DOWNLOAD_TRANSCRIPTS", "true").lower() in {"1", "true", "yes", "y"}
|
||||
DEFAULT_LANG = os.getenv("DEFAULT_TRANSCRIPT_LANG", "en").strip() or "en"
|
||||
RSS_SCAN_MINUTES = int(os.getenv("RSS_SCAN_MINUTES", "15"))
|
||||
RSS_ONCE = os.getenv("RSS_ONCE", "0").lower() in {"1", "true", "yes", "y"}
|
||||
AUDIO_MAX_MB = int(os.getenv("RSS_AUDIO_MAX_MB", "0")) # 0 = unlimited
|
||||
|
||||
# Where media files live; used to sidecar RSS transcripts next to matching media
|
||||
LIB = Path(os.getenv("LIBRARY_ROOT", "/library"))
|
||||
@@ -130,6 +133,58 @@ def _guess_ext_from_type(mime: str) -> str:
|
||||
return ".txt"
|
||||
|
||||
|
||||
def _guess_audio_ext(mime: str, url: str) -> str:
|
||||
# Prefer by MIME; fall back to URL suffix
|
||||
mime = (mime or "").lower()
|
||||
if "mp3" in mime:
|
||||
return ".mp3"
|
||||
if "aac" in mime or "mp4a" in mime:
|
||||
return ".m4a"
|
||||
if "m4a" in mime:
|
||||
return ".m4a"
|
||||
if "ogg" in mime:
|
||||
return ".ogg"
|
||||
if "opus" in mime:
|
||||
return ".opus"
|
||||
if "flac" in mime:
|
||||
return ".flac"
|
||||
if "wav" in mime:
|
||||
return ".wav"
|
||||
# fallback by URL
|
||||
suf = Path(urlparse(url).path).suffix.lower()
|
||||
if suf in {".mp3", ".m4a", ".aac", ".ogg", ".opus", ".flac", ".wav"}:
|
||||
return ".m4a" if suf == ".aac" else suf
|
||||
return ".mp3"
|
||||
|
||||
def _download_stream(url: str, dst: Path) -> Path | None:
|
||||
try:
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
with requests.get(url, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"}, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
max_bytes = AUDIO_MAX_MB * 1024 * 1024 if AUDIO_MAX_MB > 0 else None
|
||||
total = 0
|
||||
with open(dst, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=1024 * 256):
|
||||
if not chunk:
|
||||
continue
|
||||
f.write(chunk)
|
||||
total += len(chunk)
|
||||
if max_bytes and total > max_bytes:
|
||||
# stop and remove partial
|
||||
try:
|
||||
f.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
dst.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
return dst
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _norm_text(s: str) -> str:
|
||||
s = (s or "").lower()
|
||||
s = re.sub(r"\s+", " ", s)
|
||||
@@ -224,12 +279,15 @@ def _gather_transcripts(item: ET.Element):
|
||||
def parse_feed(feed_url: str):
|
||||
items = []
|
||||
try:
|
||||
print(f"[rss] fetching {feed_url}", flush=True)
|
||||
r = requests.get(feed_url, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"})
|
||||
r.raise_for_status()
|
||||
root = ET.fromstring(r.content)
|
||||
|
||||
channel = root.find("channel") or root
|
||||
show_title = _text(_find_ns(channel, "title")) or _text(_find_ns(root, "title"))
|
||||
if not show_title:
|
||||
show_title = ""
|
||||
|
||||
for it in _iter_items(channel):
|
||||
title = _text(_find_ns(it, "title"))
|
||||
@@ -320,12 +378,15 @@ def parse_feed(feed_url: str):
|
||||
|
||||
items.append(item_rec)
|
||||
|
||||
print(f"[rss] parsed {len(items)} episode(s) from {show_title or feed_url}", flush=True)
|
||||
return {"feed_url": feed_url, "show": show_title, "episodes": items}
|
||||
except Exception as e:
|
||||
print(f"[rss] ERROR parsing {feed_url}: {e}", flush=True)
|
||||
return {"feed_url": feed_url, "error": str(e), "episodes": []}
|
||||
|
||||
|
||||
def load_feeds_list():
|
||||
print(f"[rss] FEEDS_FILE={FEEDS_FILE} FEEDS_ENV={'set' if bool(FEEDS_ENV) else 'unset'}", flush=True)
|
||||
feeds = []
|
||||
if FEEDS_ENV:
|
||||
feeds.extend([u.strip() for u in FEEDS_ENV.split(",") if u.strip()])
|
||||
@@ -338,8 +399,12 @@ def load_feeds_list():
|
||||
feeds.append(line)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
print(f"[rss] feeds file not found: {FEEDS_FILE}", flush=True)
|
||||
# unique, keep order
|
||||
return sorted(list(dict.fromkeys(feeds)))
|
||||
feeds = sorted(list(dict.fromkeys(feeds)))
|
||||
print(f"[rss] parsed {len(feeds)} feed URL(s)", flush=True)
|
||||
return feeds
|
||||
|
||||
|
||||
def build_index():
|
||||
@@ -354,4 +419,11 @@ def build_index():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
while True:
|
||||
try:
|
||||
build_index()
|
||||
except Exception as e:
|
||||
print(f"[rss] build error: {e}", flush=True)
|
||||
if RSS_ONCE:
|
||||
break
|
||||
time.sleep(max(1, RSS_SCAN_MINUTES) * 60)
|
Reference in New Issue
Block a user