Adding support for direct resolution of Apple Podcasts URIs
This commit is contained in:
@@ -42,6 +42,35 @@ OUT_INDEX.parent.mkdir(parents=True, exist_ok=True)
|
||||
PODCASTS_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# --- Helper to resolve Apple Podcasts URLs to direct RSS feeds ---
|
||||
def _resolve_feed_url(u: str) -> str:
|
||||
"""
|
||||
Accepts a URL that may be an Apple Podcasts show/episode page and tries to resolve it
|
||||
into a direct RSS feed URL using the public iTunes Lookup API.
|
||||
For unknown hosts or failures, returns the original URL.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(u)
|
||||
host = (parsed.netloc or "").lower()
|
||||
if "podcasts.apple.com" in host:
|
||||
# Apple Podcasts URLs typically end with .../id<digits>
|
||||
m = re.search(r"id(\d+)", parsed.path)
|
||||
if m:
|
||||
pid = m.group(1)
|
||||
lookup = f"https://itunes.apple.com/lookup?id={pid}"
|
||||
r = requests.get(lookup, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"})
|
||||
if r.ok:
|
||||
data = r.json()
|
||||
for res in data.get("results", []) or []:
|
||||
feed = res.get("feedUrl")
|
||||
if feed:
|
||||
return feed.strip()
|
||||
# otherwise return unchanged
|
||||
return u
|
||||
except Exception:
|
||||
return u
|
||||
|
||||
|
||||
def _text(el):
|
||||
return (el.text or "").strip() if el is not None else ""
|
||||
|
||||
@@ -403,6 +432,8 @@ def load_feeds_list():
|
||||
print(f"[rss] feeds file not found: {FEEDS_FILE}", flush=True)
|
||||
# unique, keep order
|
||||
feeds = sorted(list(dict.fromkeys(feeds)))
|
||||
feeds = [_resolve_feed_url(u) for u in feeds]
|
||||
print(f"[rss] resolved {len(feeds)} feed URL(s) (after normalization)", flush=True)
|
||||
print(f"[rss] parsed {len(feeds)} feed URL(s)", flush=True)
|
||||
return feeds
|
||||
|
||||
|
Reference in New Issue
Block a user