#!/usr/bin/env python3 import os, sys, glob, time, json, html, argparse, pathlib, textwrap, re from datetime import date import yaml, requests, jwt from jinja2 import Template from dotenv import load_dotenv load_dotenv() ROOT = pathlib.Path(__file__).parent TEMPLATES = ROOT / "templates" def read_file(p): return pathlib.Path(p).read_text(encoding="utf-8") def load_config(): cfg = yaml.safe_load(read_file(ROOT / "config.yaml")) cfg["date"] = date.today().isoformat() return cfg def parse_front_matter(text): m = re.match(r"^---\n(.*?)\n---\n(.*)$", text, flags=re.S|re.M) if not m: return {}, text.strip() import yaml as _yaml fm = _yaml.safe_load(m.group(1)) or {} body = m.group(2).strip() return fm, body # ---- LLM config / client ---- from dataclasses import dataclass @dataclass class LLMConfig: provider: str api_base: str model: str api_key: str | None temperature: float top_p: float presence_penalty: float frequency_penalty: float timeout_seconds: int max_retries: int def resolve_llm_config(cfg: dict, args) -> LLMConfig: llm_cfg = cfg.get("llm", {}) if cfg else {} def pick(cli_val, env_key, cfg_key, default=None): if cli_val is not None: return cli_val if env_key and os.getenv(env_key): return os.getenv(env_key) return llm_cfg.get(cfg_key, default) provider = pick(getattr(args, "llm_provider", None), "LLM_PROVIDER", "provider", "openwebui") api_base = pick(getattr(args, "llm_api_base", None), "LLM_API_BASE", "api_base", "http://localhost:3000" if provider=="openwebui" else "http://localhost:11434" if provider=="ollama" else "https://api.openai.com") model = pick(getattr(args, "llm_model", None), "LLM_MODEL", "model", "qwen2.5-7b-instruct" if provider=="openwebui" else "llama3.1:8b-instruct" if provider=="ollama" else "gpt-4o-mini") api_key = os.getenv("LLM_API_KEY") or (os.getenv("OPENAI_API_KEY") if provider=="openai" else None) temperature = float(pick(getattr(args, "temperature", None), "LLM_TEMPERATURE", "temperature", 0.2)) top_p = float(pick(getattr(args, "top_p", None), "LLM_TOP_P", "top_p", 1.0)) presence_penalty = float(pick(getattr(args, "presence_penalty", None), "LLM_PRESENCE_PENALTY", "presence_penalty", 0.0)) frequency_penalty = float(pick(getattr(args, "frequency_penalty", None), "LLM_FREQUENCY_PENALTY", "frequency_penalty", 0.0)) timeout_seconds = int(pick(getattr(args, "timeout_seconds", None), "LLM_TIMEOUT_SECONDS", "timeout_seconds", 120)) max_retries = int(pick(getattr(args, "max_retries", None), "LLM_MAX_RETRIES", "max_retries", 2)) return LLMConfig( provider=provider, api_base=api_base, model=model, api_key=api_key, temperature=temperature, top_p=top_p, presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, timeout_seconds=timeout_seconds, max_retries=max_retries ) def chat_completion_llm(messages, llm: LLMConfig): if llm.provider == "openwebui": url = f"{llm.api_base.rstrip('/')}/api/chat/completions" headers = {"Content-Type":"application/json"} if llm.api_key: headers["Authorization"] = f"Bearer {llm.api_key}" elif llm.provider == "ollama": url = f"{llm.api_base.rstrip('/')}/v1/chat/completions" headers = {"Content-Type":"application/json"} if llm.api_key: headers["Authorization"] = f"Bearer {llm.api_key}" else: url = f"{llm.api_base.rstrip('/')}/v1/chat/completions" headers = {"Content-Type":"application/json"} if llm.api_key: headers["Authorization"] = f"Bearer {llm.api_key}" payload = { "model": llm.model, "messages": messages, "temperature": llm.temperature, "top_p": llm.top_p, "presence_penalty": llm.presence_penalty, "frequency_penalty": llm.frequency_penalty, "stream": False } attempt = 0 last_err = None while attempt <= llm.max_retries: try: r = requests.post(url, headers=headers, json=payload, timeout=llm.timeout_seconds) r.raise_for_status() data = r.json() return data["choices"][0]["message"]["content"] except Exception as e: last_err = e attempt += 1 if attempt > llm.max_retries: break time.sleep(min(2**attempt, 8)) raise RuntimeError(f"LLM request failed after {llm.max_retries} retries: {last_err}") def call_llm_via_messages(prompt: str, llm: LLMConfig) -> str: return chat_completion_llm([{"role":"user","content": prompt}], llm) # ---- Ghost ---- def ghost_jwt(key: str) -> str: key_id, secret = key.split(':') iat = int(time.time()) header = {"alg": "HS256", "kid": key_id, "typ": "JWT"} payload = {"iat": iat, "exp": iat + 5 * 60, "aud": "/admin/"} return jwt.encode(payload, bytes.fromhex(secret), algorithm='HS256', headers=header) def create_ghost_draft(ghost_url, ghost_key, html_content, title, tags): token = ghost_jwt(ghost_key) payload = { "posts": [{ "title": title, "html": html_content, "status": "draft", "tags": [{"name": t} for t in tags] }]} r = requests.post( f"{ghost_url}/posts/", headers={"Authorization": f"Ghost {token}", "Content-Type": "application/json"}, data=json.dumps(payload), timeout=60 ) r.raise_for_status() return r.json()["posts"][0]["url"] # ---- Memory/embeddings ---- from db import connect as db_connect, topk_similar from emb import embed_text def build_related_hint_auto(title, body, llm_cfg, cfg_db): api_base = os.getenv("EMB_API_BASE", llm_cfg.api_base) api_key = os.getenv("EMB_API_KEY", llm_cfg.api_key) model = os.getenv("EMB_MODEL", cfg_db.get("embed_model", "text-embedding-3-small")) qtext = (title + "\n\n" + body)[:5000] try: vec = embed_text(qtext, api_base, api_key, model) except Exception: return "—" con = db_connect(cfg_db["path"]) hits = topk_similar(con, model=model, query_vec=vec, ref_table="summaries", k=cfg_db.get("related_top_k",3), min_sim=cfg_db.get("min_similarity",0.78)) if not hits: return "—" lines = [] for sid, t, s, nd in hits: lines.append(f"- {nd or 'dříve'}: {t}") return "O podobném tématu jsme psali:\n" + "\n".join(lines) + "\nZmiň jednou větou souvislost." def main(): ap = argparse.ArgumentParser(description="Offline-first generator + Ghost draft") ap.add_argument("entries_dir", help="entries/YYYY-MM-DD directory") ap.add_argument("--out", help="Output HTML path, e.g. dist/2025-09-19.html") ap.add_argument("--dry-run", action="store_true") ap.add_argument("--publish", action="store_true") # LLM overrides ap.add_argument("--llm-provider") ap.add_argument("--llm-api-base") ap.add_argument("--llm-model") ap.add_argument("--temperature", type=float) ap.add_argument("--top-p", type=float) ap.add_argument("--presence-penalty", type=float) ap.add_argument("--frequency-penalty", type=float) ap.add_argument("--timeout-seconds", type=int) ap.add_argument("--max-retries", type=int) args = ap.parse_args() cfg = load_config() llm = resolve_llm_config(cfg, args) item_tpl = read_file(TEMPLATES / "item.html.j2") news_tpl = read_file(TEMPLATES / "newsletter.html.j2") prompt_template = read_file(TEMPLATES / "prompt.txt") style_examples = read_file(TEMPLATES / "style_bank.md").strip() prompt_template = prompt_template.replace("{style_examples}", style_examples) paths = sorted(glob.glob(os.path.join(args.entries_dir, "*.md"))) blocks = [] for p in paths: fm_text = pathlib.Path(p).read_text(encoding="utf-8") fm, body = parse_front_matter(fm_text) if fm.get("status","todo") == "skip": continue title = fm.get("title") or pathlib.Path(p).stem.replace("-"," ").title() source_name = fm.get("source_name","Zdroj neuveden") related_hint = build_related_hint_auto(title, body, llm, cfg.get("db",{})) prompt = (prompt_template .replace("{title}", title) .replace("{body}", body) .replace("{source_name}", source_name) .replace("{related_hint}", related_hint)) summary = call_llm_via_messages(prompt, llm) block_html = Template(item_tpl).render(title=title, summary=summary) blocks.append(block_html) newsletter_title = Template(cfg["newsletter_title"]).render(date=cfg["date"]) newsletter_subtitle = cfg.get("newsletter_subtitle","") html_out = Template(news_tpl).render( newsletter_title=newsletter_title, newsletter_subtitle=newsletter_subtitle, blocks=blocks ) if args.out: outp = pathlib.Path(args.out) outp.parent.mkdir(parents=True, exist_ok=True) outp.write_text(html_out, encoding="utf-8") print(f"Saved: {outp}") if args.publish: ghost_url = os.getenv("GHOST_ADMIN_API_URL") ghost_key = os.getenv("GHOST_ADMIN_API_KEY") if not (ghost_url and ghost_key): print("Missing GHOST_ADMIN_API_URL or GHOST_ADMIN_API_KEY in .env", file=sys.stderr) sys.exit(2) url = create_ghost_draft(ghost_url, ghost_key, html_out, newsletter_title, cfg.get("default_tags",[])) print("Draft:", url) if not (args.out or args.publish): print(html_out) if __name__ == "__main__": main()