Web app updates

This commit is contained in:
2025-09-08 18:28:44 +02:00
parent 6284f0ee7b
commit 33e11e45f1

View File

@@ -1,5 +1,5 @@
from flask import Flask, request, redirect from flask import Flask, request, redirect, send_file, abort, Response, make_response
import os, json, time, requests import os, json, time, requests, re
from pathlib import Path from pathlib import Path
from redis import Redis from redis import Redis
from rq import Queue from rq import Queue
@@ -7,6 +7,8 @@ from rq import Queue
MEILI_URL = os.getenv("MEILI_URL", "http://meili:7700") MEILI_URL = os.getenv("MEILI_URL", "http://meili:7700")
MEILI_KEY = os.getenv("MEILI_KEY", "") # from .env MEILI_KEY = os.getenv("MEILI_KEY", "") # from .env
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0") REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
LIBRARY_ROOT = Path(os.getenv("LIBRARY_ROOT", "/library")).resolve()
TRANSCRIPT_ROOT = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts")).resolve()
app = Flask(__name__) app = Flask(__name__)
FEED_LOG = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts")) / "_feed.log" FEED_LOG = Path(os.getenv("TRANSCRIPT_ROOT", "/transcripts")) / "_feed.log"
@@ -184,6 +186,120 @@ def recent():
html.append(f"<div class='card'><b>{item.get('title','')}</b><br><small>{item.get('uploader','')}{item.get('date','')}{item.get('status','')}</small><br><small>{item.get('path','')}</small></div>") html.append(f"<div class='card'><b>{item.get('title','')}</b><br><small>{item.get('uploader','')}{item.get('date','')}{item.get('status','')}</small><br><small>{item.get('path','')}</small></div>")
return "\n".join(html) return "\n".join(html)
def _safe_under(base: Path, rel_path: str) -> Path:
candidate = (base / rel_path.lstrip('/')).resolve()
if not str(candidate).startswith(str(base)):
raise FileNotFoundError("Path escapes base")
return candidate
def _vtt_header():
return "WEBVTT\n\n"
def _srt_to_vtt_text(srt_text: str) -> str:
# Minimal conversion: SRT -> VTT timestamp format + header
# Replace commas in timecodes with dots
body = re.sub(r"(?m)(\d{2}:\d{2}:\d{2}),(\d{3})", r"\1.\2", srt_text)
# Ensure a WEBVTT header
if not body.lstrip().upper().startswith("WEBVTT"):
body = _vtt_header() + body
return body
def _json_to_vtt_text(json_text: str) -> str:
# Expect Whisper-style segments [{'start':..,'end':..,'text':..}, ...]
try:
data = json.loads(json_text)
except Exception:
return _vtt_header()
segments = data.get("segments") or data # support raw list
out = [_vtt_header()]
idx = 1
for seg in segments or []:
try:
start = float(seg.get("start", 0))
end = float(seg.get("end", start + 0.5))
text = str(seg.get("text", "")).strip()
except Exception:
continue
def fmt(t):
h = int(t // 3600); m = int((t % 3600) // 60); s = t - h*3600 - m*60
return f"{h:02d}:{m:02d}:{s:06.3f}".replace(",", ".")
out.append(f"{idx}")
out.append(f"{fmt(start)} --> {fmt(end)}")
out.append(text or "")
out.append("") # blank line
idx += 1
return "\n".join(out).rstrip() + "\n"
def _parse_vtt_to_cues(vtt_text: str):
"""Very small VTT parser -> list of dicts {start,end,text} (seconds, seconds, str)."""
def to_seconds(ts: str) -> float:
# 00:00:00.000 or 00:00.000 (allow both)
parts = ts.replace(",", ".").split(":")
try:
if len(parts) == 3:
h, m, s = int(parts[0]), int(parts[1]), float(parts[2])
else:
h, m, s = 0, int(parts[0]), float(parts[1])
return h*3600 + m*60 + s
except Exception:
return 0.0
cues = []
lines = [ln.rstrip("\n\r") for ln in vtt_text.splitlines()]
i = 0
while i < len(lines):
ln = lines[i].strip()
i += 1
if not ln or ln.upper().startswith("WEBVTT"):
continue
# Optional numeric counter line
if ln.isdigit() and i < len(lines):
ln = lines[i].strip(); i += 1
if "-->" in ln:
try:
l, r = ln.split("-->", 1)
start = to_seconds(l.strip())
end = to_seconds(r.strip().split(" ")[0])
except Exception:
start = end = 0.0
texts = []
while i < len(lines) and lines[i].strip() != "":
texts.append(lines[i])
i += 1
# skip blank separator
while i < len(lines) and lines[i].strip() == "":
i += 1
cue_text = " ".join([t.strip() for t in texts]).strip()
if cue_text:
cues.append({"start": start, "end": end, "text": cue_text})
return cues
def _load_transcript_variants(basename: str):
"""
Return tuple (kind, content_text, path_used) where kind in {'vtt','srt','json','txt',None}
"""
# Look under TRANSCRIPT_ROOT securely
root = TRANSCRIPT_ROOT
cand = [
(root / f"{basename}.vtt", "vtt"),
(root / f"{basename}.srt", "srt"),
(root / f"{basename}.json", "json"),
(root / f"{basename}.txt", "txt"),
]
for p, k in cand:
try:
p = p.resolve()
except Exception:
continue
if not str(p).startswith(str(root)):
continue
if p.exists():
try:
with open(p, "r", encoding="utf-8", errors="ignore") as f:
return (k, f.read(), str(p))
except Exception:
continue
return (None, "", "")
@app.get("/search") @app.get("/search")
def search(): def search():
qstr = request.args.get("q","") qstr = request.args.get("q","")
@@ -197,9 +313,9 @@ def search():
segs = h.get("segments",[]) segs = h.get("segments",[])
ts = int(segs[0]["start"]) if segs else 0 ts = int(segs[0]["start"]) if segs else 0
if typ == 'podcast': if typ == 'podcast':
open_link = f"/open?file={requests.utils.quote(src)}&t={ts}" open_link = f"/play?file={requests.utils.quote(src)}&t={ts}"
else: else:
open_link = f"/open?file={requests.utils.quote(src)}" open_link = f"/play?file={requests.utils.quote(src)}"
transcript_link = f" | <a href=\"/subtitle?file={requests.utils.quote(src)}\">Transcript</a>" if typ == 'podcast' else "" transcript_link = f" | <a href=\"/subtitle?file={requests.utils.quote(src)}\">Transcript</a>" if typ == 'podcast' else ""
badge = f"<span class='badge'>{typ}</span>" badge = f"<span class='badge'>{typ}</span>"
out.append( out.append(
@@ -217,12 +333,135 @@ def open_local():
t = int(request.args.get("t","0")) t = int(request.args.get("t","0"))
return f"<pre>{file}\nStart at: {t} sec</pre>" return f"<pre>{file}\nStart at: {t} sec</pre>"
@app.get('/media')
def media():
rel = request.args.get('file', '')
try:
full = _safe_under(LIBRARY_ROOT, rel)
except Exception:
return abort(404)
if not full.exists():
return abort(404)
# Let Flask guess mimetype
return send_file(str(full), conditional=True)
@app.get('/play')
def play():
rel = request.args.get('file', '')
t = int(request.args.get('t', '0') or 0)
src = f"/media?file={requests.utils.quote(rel)}"
track = f"/subtitle?file={requests.utils.quote(rel)}&format=vtt"
return (
"<!doctype html><meta charset='utf-8'>"
"<title>Play</title>"
"<style>body{font-family:system-ui;margin:1rem}</style>"
f"<h3>{rel}</h3>"
f"<video id='v' controls style='max-width:100%;width:100%'>"
f" <source src='{src}'>"
f" <track kind='subtitles' src='{track}' srclang='en' label='Transcript' default>"
" Your browser cannot play this media."
"</video>"
"<script>const v=document.getElementById('v');"
f"v.addEventListener('loadedmetadata',()=>{{try{{v.currentTime={t};}}catch(e){{}}}});"
"</script>"
)
@app.get("/subtitle") @app.get("/subtitle")
def subtitle(): def subtitle():
file = request.args.get("file","") file = request.args.get("file", "")
fmt = request.args.get("format", "").lower() # when 'vtt', serve as text/vtt for player
base = os.path.splitext(os.path.basename(file))[0] base = os.path.splitext(os.path.basename(file))[0]
p = f"/transcripts/{base}.vtt"
if os.path.exists(p): kind, content, used_path = _load_transcript_variants(base)
with open(p,"r",encoding="utf-8") as f:
return f"<pre>{f.read()}</pre>" # Build a VTT if requested/needed and we can
return "<small>No VTT found.</small>" if fmt == "vtt":
vtt = ""
if kind == "vtt":
vtt = content if content.lstrip().upper().startswith("WEBVTT") else _vtt_header() + content
elif kind == "srt":
vtt = _srt_to_vtt_text(content)
elif kind == "json":
vtt = _json_to_vtt_text(content)
else:
# No structured timing available
return abort(404)
resp = make_response(vtt)
resp.headers["Content-Type"] = "text/vtt; charset=utf-8"
return resp
# Otherwise, render a simple HTML preview
if kind in ("vtt", "srt", "json"):
# Normalize to VTT first
if kind == "vtt":
vtt_text = content if content.lstrip().upper().startswith("WEBVTT") else _vtt_header() + content
elif kind == "srt":
vtt_text = _srt_to_vtt_text(content)
else:
vtt_text = _json_to_vtt_text(content)
# If ?raw=1 is present, show raw VTT for debugging
if request.args.get("raw") == "1":
return (
"<!doctype html><meta charset='utf-8'>"
"<title>Transcript</title>"
"<style>body{font-family:system-ui;margin:1rem}</style>"
f"<h3>Transcript (raw VTT): {base}</h3>"
f"<pre style='white-space:pre-wrap'>{vtt_text}</pre>"
)
# Otherwise render a readable transcript with clickable timestamps
cues = _parse_vtt_to_cues(vtt_text)
# Build HTML list
items = []
for c in cues:
mm = int(c["start"] // 60)
ss = int(c["start"] % 60)
hh = int(c["start"] // 3600)
ts_label = f"{hh:02d}:{mm%60:02d}:{ss:02d}" if hh else f"{mm:02d}:{ss:02d}"
items.append(
"<div class='cue'>"
f"<button class='ts' data-t='{int(c['start'])}'>{ts_label}</button>"
f"<span class='text'>{c['text']}</span>"
"</div>"
)
html = (
"<!doctype html><meta charset='utf-8'>"
"<title>Transcript</title>"
"<style>"
":root{--fg:#111;--muted:#666;--bg:#fff;--ring:#e9ecef;}"
"body{font-family:system-ui;margin:1rem;line-height:1.5;color:var(--fg);background:var(--bg)}"
".wrap{max-width:900px;margin:0 auto}"
".meta{color:var(--muted);margin:.25rem 0 1rem}"
".cue{display:flex;gap:.75rem;align-items:flex-start;padding:.35rem .25rem;border-bottom:1px solid #f0f0f0}"
".cue .text{white-space:pre-wrap}"
".ts{font:inherit;border:1px solid #ccc;background:#fafafa;border-radius:6px;padding:.15rem .45rem;cursor:pointer}"
".ts:hover{background:#f2f2f2}"
"</style>"
f"<div class='wrap'><h3>Transcript: {base}</h3>"
"<div class='meta'>Click a timestamp to open the player at that point.</div>"
f"<div id='list'>{''.join(items) or '<small>No cues found.</small>'}</div>"
"<script>\n"
"const file=new URLSearchParams(location.search).get('file')||'';\n"
"document.querySelectorAll('.ts').forEach(b=>{b.addEventListener('click',()=>{\n"
" const t=b.dataset.t||'0';\n"
" const url='/play?file='+encodeURIComponent(file)+'&t='+t;\n"
" window.open(url,'_blank');\n"
"});});\n"
"</script></div>"
)
return html
elif kind == "txt":
safe = content.strip()
# Simple paragraphization: collapse >2 newlines, wrap in <p>
paras = [p.strip() for p in re.split(r"\n{2,}", safe) if p.strip()]
items = "".join(f"<p>{re.sub(r'[\n\r]+', ' ', p)}</p>" for p in paras[:2000])
return (
"<!doctype html><meta charset='utf-8'>"
"<title>Transcript</title>"
"<style>body{font-family:system-ui;margin:1rem;line-height:1.6;max-width:900px} p{margin:.4rem 0}</style>"
f"<h3>Transcript (plain text): {base}</h3>"
f"{items or '<pre style=\"white-space:pre-wrap\">'+safe+'</pre>'}"
)
else:
return "<small>No transcript found.</small>"