First commit
This commit is contained in:
246
build.py
Normal file
246
build.py
Normal file
@@ -0,0 +1,246 @@
|
||||
#!/usr/bin/env python3
|
||||
import os, sys, glob, time, json, html, argparse, pathlib, textwrap, re
|
||||
from datetime import date
|
||||
import yaml, requests, jwt
|
||||
from jinja2 import Template
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
ROOT = pathlib.Path(__file__).parent
|
||||
TEMPLATES = ROOT / "templates"
|
||||
|
||||
def read_file(p): return pathlib.Path(p).read_text(encoding="utf-8")
|
||||
|
||||
def load_config():
|
||||
cfg = yaml.safe_load(read_file(ROOT / "config.yaml"))
|
||||
cfg["date"] = date.today().isoformat()
|
||||
return cfg
|
||||
|
||||
def parse_front_matter(text):
|
||||
m = re.match(r"^---\n(.*?)\n---\n(.*)$", text, flags=re.S|re.M)
|
||||
if not m:
|
||||
return {}, text.strip()
|
||||
import yaml as _yaml
|
||||
fm = _yaml.safe_load(m.group(1)) or {}
|
||||
body = m.group(2).strip()
|
||||
return fm, body
|
||||
|
||||
# ---- LLM config / client ----
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class LLMConfig:
|
||||
provider: str
|
||||
api_base: str
|
||||
model: str
|
||||
api_key: str | None
|
||||
temperature: float
|
||||
top_p: float
|
||||
presence_penalty: float
|
||||
frequency_penalty: float
|
||||
timeout_seconds: int
|
||||
max_retries: int
|
||||
|
||||
def resolve_llm_config(cfg: dict, args) -> LLMConfig:
|
||||
llm_cfg = cfg.get("llm", {}) if cfg else {}
|
||||
|
||||
def pick(cli_val, env_key, cfg_key, default=None):
|
||||
if cli_val is not None:
|
||||
return cli_val
|
||||
if env_key and os.getenv(env_key):
|
||||
return os.getenv(env_key)
|
||||
return llm_cfg.get(cfg_key, default)
|
||||
|
||||
provider = pick(getattr(args, "llm_provider", None), "LLM_PROVIDER", "provider", "openwebui")
|
||||
api_base = pick(getattr(args, "llm_api_base", None), "LLM_API_BASE", "api_base",
|
||||
"http://localhost:3000" if provider=="openwebui" else
|
||||
"http://localhost:11434" if provider=="ollama" else
|
||||
"https://api.openai.com")
|
||||
model = pick(getattr(args, "llm_model", None), "LLM_MODEL", "model",
|
||||
"qwen2.5-7b-instruct" if provider=="openwebui" else
|
||||
"llama3.1:8b-instruct" if provider=="ollama" else
|
||||
"gpt-4o-mini")
|
||||
api_key = os.getenv("LLM_API_KEY") or (os.getenv("OPENAI_API_KEY") if provider=="openai" else None)
|
||||
|
||||
temperature = float(pick(getattr(args, "temperature", None), "LLM_TEMPERATURE", "temperature", 0.2))
|
||||
top_p = float(pick(getattr(args, "top_p", None), "LLM_TOP_P", "top_p", 1.0))
|
||||
presence_penalty = float(pick(getattr(args, "presence_penalty", None), "LLM_PRESENCE_PENALTY", "presence_penalty", 0.0))
|
||||
frequency_penalty = float(pick(getattr(args, "frequency_penalty", None), "LLM_FREQUENCY_PENALTY", "frequency_penalty", 0.0))
|
||||
timeout_seconds = int(pick(getattr(args, "timeout_seconds", None), "LLM_TIMEOUT_SECONDS", "timeout_seconds", 120))
|
||||
max_retries = int(pick(getattr(args, "max_retries", None), "LLM_MAX_RETRIES", "max_retries", 2))
|
||||
|
||||
return LLMConfig(
|
||||
provider=provider, api_base=api_base, model=model, api_key=api_key,
|
||||
temperature=temperature, top_p=top_p,
|
||||
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty,
|
||||
timeout_seconds=timeout_seconds, max_retries=max_retries
|
||||
)
|
||||
|
||||
def chat_completion_llm(messages, llm: LLMConfig):
|
||||
if llm.provider == "openwebui":
|
||||
url = f"{llm.api_base.rstrip('/')}/api/chat/completions"
|
||||
headers = {"Content-Type":"application/json"}
|
||||
if llm.api_key:
|
||||
headers["Authorization"] = f"Bearer {llm.api_key}"
|
||||
elif llm.provider == "ollama":
|
||||
url = f"{llm.api_base.rstrip('/')}/v1/chat/completions"
|
||||
headers = {"Content-Type":"application/json"}
|
||||
if llm.api_key:
|
||||
headers["Authorization"] = f"Bearer {llm.api_key}"
|
||||
else:
|
||||
url = f"{llm.api_base.rstrip('/')}/v1/chat/completions"
|
||||
headers = {"Content-Type":"application/json"}
|
||||
if llm.api_key:
|
||||
headers["Authorization"] = f"Bearer {llm.api_key}"
|
||||
|
||||
payload = {
|
||||
"model": llm.model,
|
||||
"messages": messages,
|
||||
"temperature": llm.temperature,
|
||||
"top_p": llm.top_p,
|
||||
"presence_penalty": llm.presence_penalty,
|
||||
"frequency_penalty": llm.frequency_penalty,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
attempt = 0
|
||||
last_err = None
|
||||
while attempt <= llm.max_retries:
|
||||
try:
|
||||
r = requests.post(url, headers=headers, json=payload, timeout=llm.timeout_seconds)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
except Exception as e:
|
||||
last_err = e
|
||||
attempt += 1
|
||||
if attempt > llm.max_retries:
|
||||
break
|
||||
time.sleep(min(2**attempt, 8))
|
||||
raise RuntimeError(f"LLM request failed after {llm.max_retries} retries: {last_err}")
|
||||
|
||||
def call_llm_via_messages(prompt: str, llm: LLMConfig) -> str:
|
||||
return chat_completion_llm([{"role":"user","content": prompt}], llm)
|
||||
|
||||
# ---- Ghost ----
|
||||
def ghost_jwt(key: str) -> str:
|
||||
key_id, secret = key.split(':')
|
||||
iat = int(time.time())
|
||||
header = {"alg": "HS256", "kid": key_id, "typ": "JWT"}
|
||||
payload = {"iat": iat, "exp": iat + 5 * 60, "aud": "/admin/"}
|
||||
return jwt.encode(payload, bytes.fromhex(secret), algorithm='HS256', headers=header)
|
||||
|
||||
def create_ghost_draft(ghost_url, ghost_key, html_content, title, tags):
|
||||
token = ghost_jwt(ghost_key)
|
||||
payload = { "posts": [{
|
||||
"title": title, "html": html_content, "status": "draft",
|
||||
"tags": [{"name": t} for t in tags]
|
||||
}]}
|
||||
r = requests.post(
|
||||
f"{ghost_url}/posts/",
|
||||
headers={"Authorization": f"Ghost {token}", "Content-Type": "application/json"},
|
||||
data=json.dumps(payload), timeout=60
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["posts"][0]["url"]
|
||||
|
||||
# ---- Memory/embeddings ----
|
||||
from db import connect as db_connect, topk_similar
|
||||
from emb import embed_text
|
||||
|
||||
def build_related_hint_auto(title, body, llm_cfg, cfg_db):
|
||||
api_base = os.getenv("EMB_API_BASE", llm_cfg.api_base)
|
||||
api_key = os.getenv("EMB_API_KEY", llm_cfg.api_key)
|
||||
model = os.getenv("EMB_MODEL", cfg_db.get("embed_model", "text-embedding-3-small"))
|
||||
qtext = (title + "\n\n" + body)[:5000]
|
||||
try:
|
||||
vec = embed_text(qtext, api_base, api_key, model)
|
||||
except Exception:
|
||||
return "—"
|
||||
con = db_connect(cfg_db["path"])
|
||||
hits = topk_similar(con, model=model, query_vec=vec,
|
||||
ref_table="summaries",
|
||||
k=cfg_db.get("related_top_k",3),
|
||||
min_sim=cfg_db.get("min_similarity",0.78))
|
||||
if not hits:
|
||||
return "—"
|
||||
lines = []
|
||||
for sid, t, s, nd in hits:
|
||||
lines.append(f"- {nd or 'dříve'}: {t}")
|
||||
return "O podobném tématu jsme psali:\n" + "\n".join(lines) + "\nZmiň jednou větou souvislost."
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Offline-first generator + Ghost draft")
|
||||
ap.add_argument("entries_dir", help="entries/YYYY-MM-DD directory")
|
||||
ap.add_argument("--out", help="Output HTML path, e.g. dist/2025-09-19.html")
|
||||
ap.add_argument("--dry-run", action="store_true")
|
||||
ap.add_argument("--publish", action="store_true")
|
||||
# LLM overrides
|
||||
ap.add_argument("--llm-provider")
|
||||
ap.add_argument("--llm-api-base")
|
||||
ap.add_argument("--llm-model")
|
||||
ap.add_argument("--temperature", type=float)
|
||||
ap.add_argument("--top-p", type=float)
|
||||
ap.add_argument("--presence-penalty", type=float)
|
||||
ap.add_argument("--frequency-penalty", type=float)
|
||||
ap.add_argument("--timeout-seconds", type=int)
|
||||
ap.add_argument("--max-retries", type=int)
|
||||
args = ap.parse_args()
|
||||
|
||||
cfg = load_config()
|
||||
llm = resolve_llm_config(cfg, args)
|
||||
|
||||
item_tpl = read_file(TEMPLATES / "item.html.j2")
|
||||
news_tpl = read_file(TEMPLATES / "newsletter.html.j2")
|
||||
prompt_template = read_file(TEMPLATES / "prompt.txt")
|
||||
style_examples = read_file(TEMPLATES / "style_bank.md").strip()
|
||||
prompt_template = prompt_template.replace("{style_examples}", style_examples)
|
||||
|
||||
paths = sorted(glob.glob(os.path.join(args.entries_dir, "*.md")))
|
||||
blocks = []
|
||||
for p in paths:
|
||||
fm_text = pathlib.Path(p).read_text(encoding="utf-8")
|
||||
fm, body = parse_front_matter(fm_text)
|
||||
if fm.get("status","todo") == "skip":
|
||||
continue
|
||||
title = fm.get("title") or pathlib.Path(p).stem.replace("-"," ").title()
|
||||
source_name = fm.get("source_name","Zdroj neuveden")
|
||||
related_hint = build_related_hint_auto(title, body, llm, cfg.get("db",{}))
|
||||
prompt = (prompt_template
|
||||
.replace("{title}", title)
|
||||
.replace("{body}", body)
|
||||
.replace("{source_name}", source_name)
|
||||
.replace("{related_hint}", related_hint))
|
||||
summary = call_llm_via_messages(prompt, llm)
|
||||
block_html = Template(item_tpl).render(title=title, summary=summary)
|
||||
blocks.append(block_html)
|
||||
|
||||
newsletter_title = Template(cfg["newsletter_title"]).render(date=cfg["date"])
|
||||
newsletter_subtitle = cfg.get("newsletter_subtitle","")
|
||||
html_out = Template(news_tpl).render(
|
||||
newsletter_title=newsletter_title,
|
||||
newsletter_subtitle=newsletter_subtitle,
|
||||
blocks=blocks
|
||||
)
|
||||
|
||||
if args.out:
|
||||
outp = pathlib.Path(args.out)
|
||||
outp.parent.mkdir(parents=True, exist_ok=True)
|
||||
outp.write_text(html_out, encoding="utf-8")
|
||||
print(f"Saved: {outp}")
|
||||
|
||||
if args.publish:
|
||||
ghost_url = os.getenv("GHOST_ADMIN_API_URL")
|
||||
ghost_key = os.getenv("GHOST_ADMIN_API_KEY")
|
||||
if not (ghost_url and ghost_key):
|
||||
print("Missing GHOST_ADMIN_API_URL or GHOST_ADMIN_API_KEY in .env", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
url = create_ghost_draft(ghost_url, ghost_key, html_out, newsletter_title, cfg.get("default_tags",[]))
|
||||
print("Draft:", url)
|
||||
|
||||
if not (args.out or args.publish):
|
||||
print(html_out)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user