First commit
This commit is contained in:
38
scripts/sync_entries_to_db.py
Normal file
38
scripts/sync_entries_to_db.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse, re
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
from db import connect, upsert_source
|
||||
|
||||
def parse_front_matter(text: str):
|
||||
m = re.match(r"^---\n(.*?)\n---\n(.*)$", text, flags=re.S|re.M)
|
||||
if not m:
|
||||
return {}, text.strip()
|
||||
fm = yaml.safe_load(m.group(1)) or {}
|
||||
body = m.group(2).strip()
|
||||
return fm, body
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Sync entries/*.md into SQLite sources")
|
||||
ap.add_argument("--db", default="data/newsletter.db")
|
||||
ap.add_argument("--dir", required=True, help="entries/YYYY-MM-DD directory")
|
||||
args = ap.parse_args()
|
||||
|
||||
con = connect(args.db)
|
||||
for p in Path(args.dir).glob("*.md"):
|
||||
text = p.read_text(encoding="utf-8")
|
||||
fm, body = parse_front_matter(text)
|
||||
title = fm.get("title") or p.stem
|
||||
url = fm.get("url")
|
||||
publisher = fm.get("source_name")
|
||||
upsert_source(con,
|
||||
url=url,
|
||||
title=title,
|
||||
publisher=publisher,
|
||||
date_published=None,
|
||||
content=body
|
||||
)
|
||||
print(f"Synced: {p.name}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user