#!/usr/bin/env python3 import os, sys, time, json, argparse, pathlib import yaml, requests, jwt from jinja2 import Template from dotenv import load_dotenv from datetime import date from db import connect as db_connect, insert_summary, upsert_embedding from emb import embed_text load_dotenv() ROOT = pathlib.Path(__file__).parent REPO = ROOT.parent TEMPLATES = REPO / "templates" def read_file(p): return pathlib.Path(p).read_text(encoding="utf-8") def load_config(): cfg = yaml.safe_load(read_file(REPO / "config.yaml")) cfg["date"] = date.today().isoformat() return cfg from dataclasses import dataclass import requests @dataclass class LLMConfig: provider: str api_base: str model: str api_key: str | None temperature: float top_p: float presence_penalty: float frequency_penalty: float timeout_seconds: int max_retries: int def resolve_llm_config(cfg: dict, args) -> LLMConfig: llm_cfg = cfg.get("llm", {}) if cfg else {} def pick(cli_val, env_key, cfg_key, default=None): if cli_val is not None: return cli_val if env_key and os.getenv(env_key): return os.getenv(env_key) return llm_cfg.get(cfg_key, default) provider = pick(getattr(args, "llm_provider", None), "LLM_PROVIDER", "provider", "openwebui") api_base = pick(getattr(args, "llm_api_base", None), "LLM_API_BASE", "api_base", "http://localhost:3000" if provider=="openwebui" else "http://localhost:11434" if provider=="ollama" else "https://api.openai.com") model = pick(getattr(args, "llm_model", None), "LLM_MODEL", "model", "qwen2.5-7b-instruct" if provider=="openwebui" else "llama3.1:8b-instruct" if provider=="ollama" else "gpt-4o-mini") api_key = os.getenv("LLM_API_KEY") or (os.getenv("OPENAI_API_KEY") if provider=="openai" else None) temperature = float(pick(getattr(args, "temperature", None), "LLM_TEMPERATURE", "temperature", 0.2)) top_p = float(pick(getattr(args, "top_p", None), "LLM_TOP_P", "top_p", 1.0)) presence_penalty = float(pick(getattr(args, "presence_penalty", None), "LLM_PRESENCE_PENALTY", "presence_penalty", 0.0)) frequency_penalty = float(pick(getattr(args, "frequency_penalty", None), "LLM_FREQUENCY_PENALTY", "frequency_penalty", 0.0)) timeout_seconds = int(pick(getattr(args, "timeout_seconds", None), "LLM_TIMEOUT_SECONDS", "timeout_seconds", 120)) max_retries = int(pick(getattr(args, "max_retries", None), "LLM_MAX_RETRIES", "max_retries", 2)) return LLMConfig(provider, api_base, model, api_key, temperature, top_p, presence_penalty, frequency_penalty, timeout_seconds, max_retries) def chat_completion_llm(messages, llm: LLMConfig): if llm.provider == "openwebui": url = f"{llm.api_base.rstrip('/')}/api/chat/completions" elif llm.provider == "ollama": url = f"{llm.api_base.rstrip('/')}/v1/chat/completions" else: url = f"{llm.api_base.rstrip('/')}/v1/chat/completions" headers = {"Content-Type":"application/json"} if llm.api_key: headers["Authorization"] = f"Bearer {llm.api_key}" payload = {"model": llm.model, "messages": messages, "temperature": llm.temperature, "top_p": llm.top_p, "presence_penalty": llm.presence_penalty, "frequency_penalty": llm.frequency_penalty, "stream": False} r = requests.post(url, headers=headers, json=payload, timeout=llm.timeout_seconds) r.raise_for_status() return r.json()["choices"][0]["message"]["content"] def main(): ap = argparse.ArgumentParser(description="Build directly from DB (Top-N sources)") ap.add_argument("--db", default="data/newsletter.db") ap.add_argument("--limit", type=int, default=10) ap.add_argument("--out", required=True) ap.add_argument("--publish", action="store_true") # LLM overrides ap.add_argument("--llm-provider"); ap.add_argument("--llm-api-base") ap.add_argument("--llm-model"); ap.add_argument("--temperature", type=float) ap.add_argument("--top-p", type=float); ap.add_argument("--presence-penalty", type=float) ap.add_argument("--frequency-penalty", type=float); ap.add_argument("--timeout-seconds", type=int) ap.add_argument("--max-retries", type=int) args = ap.parse_args() cfg = load_config() llm = resolve_llm_config(cfg, args) con = db_connect(args.db) rows = con.execute("SELECT id, url, title, publisher FROM sources ORDER BY id DESC LIMIT ?", (args.limit,)).fetchall() prompt_template = (TEMPLATES / "prompt.txt").read_text(encoding="utf-8") style_examples = (TEMPLATES / "style_bank.md").read_text(encoding="utf-8").strip() prompt_template = prompt_template.replace("{style_examples}", style_examples) item_tpl = (TEMPLATES / "item.html.j2").read_text(encoding="utf-8") news_tpl = (TEMPLATES / "newsletter.html.j2").read_text(encoding="utf-8") blocks = [] for sid, url, title, publisher in rows: body = (con.execute("SELECT content FROM sources WHERE id=?", (sid,)).fetchone()[0]) or "" related_hint = "—" prompt = (prompt_template .replace("{title}", title or url) .replace("{body}", body) .replace("{source_name}", publisher or "Zdroj neuveden") .replace("{related_hint}", related_hint)) summary = chat_completion_llm([{"role":"user","content": prompt}], llm) sum_id = insert_summary(con, sid, title or url, summary, newsletter_date=cfg["date"], tone_version="v1") try: vec = embed_text(summary, os.getenv("EMB_API_BASE", cfg["llm"]["api_base"]), os.getenv("EMB_API_KEY", os.getenv("LLM_API_KEY")), os.getenv("EMB_MODEL", cfg["db"]["embed_model"])) upsert_embedding(con, "summaries", sum_id, os.getenv("EMB_MODEL", cfg["db"]["embed_model"]), vec) except Exception: pass blocks.append(Template(item_tpl).render(title=(title or url), summary=summary)) newsletter_title = Template(cfg["newsletter_title"]).render(date=cfg["date"]) newsletter_subtitle = cfg.get("newsletter_subtitle","") html_out = Template(news_tpl).render(newsletter_title=newsletter_title, newsletter_subtitle=newsletter_subtitle, blocks=blocks) outp = pathlib.Path(args.out); outp.parent.mkdir(parents=True, exist_ok=True); outp.write_text(html_out, encoding="utf-8") print(f"Saved: {outp}") if args.publish: ghost_url = os.getenv("GHOST_ADMIN_API_URL") ghost_key = os.getenv("GHOST_ADMIN_API_KEY") if ghost_url and ghost_key: def ghost_jwt(key: str) -> str: key_id, secret = key.split(':') iat = int(time.time()) header = {"alg": "HS256", "kid": key_id, "typ": "JWT"} payload = {"iat": iat, "exp": iat + 5 * 60, "aud": "/admin/"} import jwt return jwt.encode(payload, bytes.fromhex(secret), algorithm='HS256', headers=header) token = ghost_jwt(ghost_key) payload = {"posts":[{"title": newsletter_title, "html": html_out, "status": "draft", "tags": [{"name": t} for t in cfg.get("default_tags",[])]}]} r = requests.post(f"{ghost_url}/posts/", headers={"Authorization": f"Ghost {token}", "Content-Type": "application/json"}, data=json.dumps(payload), timeout=60) r.raise_for_status() print("Draft:", r.json()["posts"][0]["url"]) else: print("Missing Ghost creds; skipped publish.") if __name__ == "__main__": main()