diff --git a/app/rss_ingest.py b/app/rss_ingest.py index bb3e999..decfcfd 100644 --- a/app/rss_ingest.py +++ b/app/rss_ingest.py @@ -42,6 +42,35 @@ OUT_INDEX.parent.mkdir(parents=True, exist_ok=True) PODCASTS_ROOT.mkdir(parents=True, exist_ok=True) +# --- Helper to resolve Apple Podcasts URLs to direct RSS feeds --- +def _resolve_feed_url(u: str) -> str: + """ + Accepts a URL that may be an Apple Podcasts show/episode page and tries to resolve it + into a direct RSS feed URL using the public iTunes Lookup API. + For unknown hosts or failures, returns the original URL. + """ + try: + parsed = urlparse(u) + host = (parsed.netloc or "").lower() + if "podcasts.apple.com" in host: + # Apple Podcasts URLs typically end with .../id + m = re.search(r"id(\d+)", parsed.path) + if m: + pid = m.group(1) + lookup = f"https://itunes.apple.com/lookup?id={pid}" + r = requests.get(lookup, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"}) + if r.ok: + data = r.json() + for res in data.get("results", []) or []: + feed = res.get("feedUrl") + if feed: + return feed.strip() + # otherwise return unchanged + return u + except Exception: + return u + + def _text(el): return (el.text or "").strip() if el is not None else "" @@ -403,6 +432,8 @@ def load_feeds_list(): print(f"[rss] feeds file not found: {FEEDS_FILE}", flush=True) # unique, keep order feeds = sorted(list(dict.fromkeys(feeds))) + feeds = [_resolve_feed_url(u) for u in feeds] + print(f"[rss] resolved {len(feeds)} feed URL(s) (after normalization)", flush=True) print(f"[rss] parsed {len(feeds)} feed URL(s)", flush=True) return feeds