Adding support for direct resolution of Apple Podcasts URIs
This commit is contained in:
@@ -42,6 +42,35 @@ OUT_INDEX.parent.mkdir(parents=True, exist_ok=True)
|
|||||||
PODCASTS_ROOT.mkdir(parents=True, exist_ok=True)
|
PODCASTS_ROOT.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Helper to resolve Apple Podcasts URLs to direct RSS feeds ---
|
||||||
|
def _resolve_feed_url(u: str) -> str:
|
||||||
|
"""
|
||||||
|
Accepts a URL that may be an Apple Podcasts show/episode page and tries to resolve it
|
||||||
|
into a direct RSS feed URL using the public iTunes Lookup API.
|
||||||
|
For unknown hosts or failures, returns the original URL.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
parsed = urlparse(u)
|
||||||
|
host = (parsed.netloc or "").lower()
|
||||||
|
if "podcasts.apple.com" in host:
|
||||||
|
# Apple Podcasts URLs typically end with .../id<digits>
|
||||||
|
m = re.search(r"id(\d+)", parsed.path)
|
||||||
|
if m:
|
||||||
|
pid = m.group(1)
|
||||||
|
lookup = f"https://itunes.apple.com/lookup?id={pid}"
|
||||||
|
r = requests.get(lookup, timeout=TIMEOUT, headers={"User-Agent": "podx/1.0"})
|
||||||
|
if r.ok:
|
||||||
|
data = r.json()
|
||||||
|
for res in data.get("results", []) or []:
|
||||||
|
feed = res.get("feedUrl")
|
||||||
|
if feed:
|
||||||
|
return feed.strip()
|
||||||
|
# otherwise return unchanged
|
||||||
|
return u
|
||||||
|
except Exception:
|
||||||
|
return u
|
||||||
|
|
||||||
|
|
||||||
def _text(el):
|
def _text(el):
|
||||||
return (el.text or "").strip() if el is not None else ""
|
return (el.text or "").strip() if el is not None else ""
|
||||||
|
|
||||||
@@ -403,6 +432,8 @@ def load_feeds_list():
|
|||||||
print(f"[rss] feeds file not found: {FEEDS_FILE}", flush=True)
|
print(f"[rss] feeds file not found: {FEEDS_FILE}", flush=True)
|
||||||
# unique, keep order
|
# unique, keep order
|
||||||
feeds = sorted(list(dict.fromkeys(feeds)))
|
feeds = sorted(list(dict.fromkeys(feeds)))
|
||||||
|
feeds = [_resolve_feed_url(u) for u in feeds]
|
||||||
|
print(f"[rss] resolved {len(feeds)} feed URL(s) (after normalization)", flush=True)
|
||||||
print(f"[rss] parsed {len(feeds)} feed URL(s)", flush=True)
|
print(f"[rss] parsed {len(feeds)} feed URL(s)", flush=True)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user