from flask import Flask, request, redirect import os, json, time, requests from pathlib import Path from redis import Redis from rq import Queue MEILI_URL = os.getenv("MEILI_URL", "http://meili:7700") MEILI_KEY = os.getenv("MEILI_KEY", "") # from .env REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0") app = Flask(__name__) FEED_LOG = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts")) / "_feed.log" q = Queue(connection=Redis.from_url(REDIS_URL)) PAGE = """ PodX - unified search

PodX

Batch

Unified search (podcasts + PDFs + EPUB + Kiwix + Web)

Recent jobs

Activity

Loading…

""" def read_feed_tail(max_lines: int = 200): if not FEED_LOG.exists(): return [] try: with open(FEED_LOG, "rb") as f: try: f.seek(-65536, 2) # read last ~64KB except OSError: f.seek(0) data = f.read().decode("utf-8", errors="ignore") except Exception: return [] lines = [x.strip() for x in data.splitlines() if x.strip()] events = [] for ln in lines[-max_lines:]: try: events.append(json.loads(ln)) except Exception: pass return events @app.get("/api/status") def api_status(): events = read_feed_tail(200) last = events[-1] if events else {} summary = { "last_status": last.get("status"), "last_title": last.get("title") or last.get("path") or last.get("url"), "last_time": int(time.time()), "count": len(events), } return {"ok": True, "summary": summary, "events": events} def meili_search(qstr, limit=30): if not qstr.strip(): return [] try: r = requests.post( f"{MEILI_URL}/indexes/library/search", headers={"Authorization": f"Bearer {MEILI_KEY}", "Content-Type": "application/json"}, data=json.dumps({"q": qstr, "limit": limit}), timeout=5, ) if r.status_code != 200: return [] return r.json().get("hits", []) except Exception: return [] @app.get("/health") def health(): return "ok" @app.get("/") def index(): return PAGE @app.post("/enqueue") def enqueue(): url = request.form["url"].strip() q.enqueue("worker.handle_url", url) return redirect("/") @app.post("/enqueue_batch") def enqueue_batch(): urls = [u.strip() for u in request.form["urls"].splitlines() if u.strip()] for u in urls: q.enqueue("worker.handle_url", u) return redirect("/") @app.get("/recent") def recent(): try: with open("/transcripts/_feed.log", "r", encoding="utf-8") as f: tail = f.readlines()[-40:] except FileNotFoundError: tail=[] html = [] for line in reversed(tail): try: item = json.loads(line) except: continue html.append(f"

{item.get('title','')}
{item.get('uploader','')} — {item.get('date','')} — {item.get('status','')}
{item.get('path','')}

") return "\n".join(html) @app.get("/search") def search(): qstr = request.args.get("q","") hits = meili_search(qstr) out=[] for h in hits: t = h.get("title","") src = h.get("source","") typ = h.get("type","") ctx = h.get("_formatted",{}).get("text", h.get("text","")[:300]) segs = h.get("segments",[]) ts = int(segs[0]["start"]) if segs else 0 if typ == 'podcast': open_link = f"/open?file={requests.utils.quote(src)}&t={ts}" else: open_link = f"/open?file={requests.utils.quote(src)}" transcript_link = f" | Transcript" if typ == 'podcast' else "" badge = f"{typ}" out.append( f"

{badge}{t}
{src}" f"

{ctx}

" f"Open" f"{transcript_link}" f"

" ) return "\n".join(out) or "No results yet." @app.get("/open") def open_local(): file = request.args.get("file","") t = int(request.args.get("t","0")) return f"

{file}\nStart at: {t} sec

" @app.get("/subtitle") def subtitle(): file = request.args.get("file","") base = os.path.splitext(os.path.basename(file))[0] p = f"/transcripts/{base}.vtt" if os.path.exists(p): with open(p,"r",encoding="utf-8") as f: return f"

{f.read()}

" return "No VTT found."