Adding support for direct resolution of Apple Podcasts URIs

This commit is contained in:
2025-09-11 14:16:18 +02:00
parent c571969b93
commit 0da1f74944

View File

@@ -42,6 +42,35 @@ OUT_INDEX.parent.mkdir(parents=True, exist_ok=True)
PODCASTS_ROOT.mkdir(parents=True, exist_ok=True)
# --- Helper to resolve Apple Podcasts URLs to direct RSS feeds ---
def _resolve_feed_url(u: str) -> str:
"""
Accepts a URL that may be an Apple Podcasts show/episode page and tries to resolve it
into a direct RSS feed URL using the public iTunes Lookup API.
For unknown hosts or failures, returns the original URL.
"""
try:
parsed = urlparse(u)
host = (parsed.netloc or "").lower()
if "podcasts.apple.com" in host:
# Apple Podcasts URLs typically end with .../id<digits>
m = re.search(r"id(\d+)", parsed.path)
if m:
pid = m.group(1)
lookup = f"https://itunes.apple.com/lookup?id={pid}"
r = requests.get(lookup, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"})
if r.ok:
data = r.json()
for res in data.get("results", []) or []:
feed = res.get("feedUrl")
if feed:
return feed.strip()
# otherwise return unchanged
return u
except Exception:
return u
def _text(el):
return (el.text or "").strip() if el is not None else ""
@@ -403,6 +432,8 @@ def load_feeds_list():
print(f"[rss] feeds file not found: {FEEDS_FILE}", flush=True)
# unique, keep order
feeds = sorted(list(dict.fromkeys(feeds)))
feeds = [_resolve_feed_url(u) for u in feeds]
print(f"[rss] resolved {len(feeds)} feed URL(s) (after normalization)", flush=True)
print(f"[rss] parsed {len(feeds)} feed URL(s)", flush=True)
return feeds