feat: Redis caching + async queue for LLM scaling (v1.6.0)

- Add async Redis client singleton (redis_client.py) for caching and arq pool - Add arq job functions (jobs.py) for background LLM processing - Cache ask/explain LLM responses with TTL (1h ask, 24h explain) - Add async mode to /api/ask: enqueue job, return job_id, poll /api/jobs/{id} - Add GET /api/jobs/{job_id} endpoint for job status polling - Add arq worker service to docker-compose (dev + prod) - Switch from Redis to Valkey (BSD fork) in Docker Compose - Add REDIS_URL config setting - Add tests for cache hit, async mode, and job status
2026-04-22 09:55:05 +02:00
parent 47e0dfc2ca
commit f75f165911
16 changed files with 498 additions and 14 deletions
--- a/backend/jobs.py
+++ b/backend/jobs.py
@@ -0,0 +1,113 @@
+"""arq job functions for async LLM processing."""
+
+import hashlib
+import json
+
+import structlog
+from arq.connections import RedisSettings
+from config import REDIS_URL
+
+logger = structlog.get_logger("aoc.jobs")
+
+# ---------------------------------------------------------------------------
+# Cache helpers
+# ---------------------------------------------------------------------------
+
+CACHE_TTL_ASK = 3600  # 1 hour
+CACHE_TTL_EXPLAIN = 86400  # 24 hours
+
+
+def _ask_cache_key(question: str, filters: dict, events: list) -> str:
+    payload = json.dumps({"q": question, "f": filters, "e": [e.get("id") for e in events]}, sort_keys=True)
+    return f"aoc:cache:ask:{hashlib.md5(payload.encode()).hexdigest()}"
+
+
+def _explain_cache_key(event_id: str) -> str:
+    return f"aoc:cache:explain:{event_id}"
+
+
+async def get_cached_ask(redis, question: str, filters: dict, events: list) -> dict | None:
+    key = _ask_cache_key(question, filters, events)
+    raw = await redis.get(key)
+    if raw:
+        return json.loads(raw)
+    return None
+
+
+async def set_cached_ask(redis, question: str, filters: dict, events: list, result: dict):
+    key = _ask_cache_key(question, filters, events)
+    await redis.setex(key, CACHE_TTL_ASK, json.dumps(result, default=str))
+
+
+async def get_cached_explain(redis, event_id: str) -> dict | None:
+    key = _explain_cache_key(event_id)
+    raw = await redis.get(key)
+    if raw:
+        return json.loads(raw)
+    return None
+
+
+async def set_cached_explain(redis, event_id: str, result: dict):
+    key = _explain_cache_key(event_id)
+    await redis.setex(key, CACHE_TTL_EXPLAIN, json.dumps(result, default=str))
+
+
+# ---------------------------------------------------------------------------
+# arq job functions
+# ---------------------------------------------------------------------------
+
+async def process_ask_question(ctx, question: str, filters: dict, events: list, total: int, excluded_services: list | None):
+    """Background job: call LLM for /api/ask and cache result."""
+    from routes.ask import _call_llm
+
+    redis = ctx["redis"]
+    try:
+        answer = await _call_llm(question, events, total=total, excluded_services=excluded_services)
+        result = {"status": "completed", "answer": answer, "llm_used": True, "llm_error": None}
+    except Exception as exc:
+        logger.warning("Async ask LLM failed", error=str(exc))
+        result = {"status": "failed", "answer": "", "llm_used": False, "llm_error": str(exc)}
+
+    await set_cached_ask(redis, question, filters, events, result)
+    return result
+
+
+async def process_explain_event(ctx, event_id: str, event: dict, related: list):
+    """Background job: call LLM for /api/events/{id}/explain and cache result."""
+    from routes.ask import _explain_event
+
+    redis = ctx["redis"]
+    try:
+        explanation = await _explain_event(event, related)
+        result = {"status": "completed", "explanation": explanation, "llm_used": True, "llm_error": None}
+    except Exception as exc:
+        logger.warning("Async explain LLM failed", error=str(exc))
+        result = {"status": "failed", "explanation": "", "llm_used": False, "llm_error": str(exc)}
+
+    await set_cached_explain(redis, event_id, result)
+    return result
+
+
+# ---------------------------------------------------------------------------
+# arq worker configuration
+# ---------------------------------------------------------------------------
+
+async def startup(ctx):
+    from redis.asyncio import Redis
+
+    ctx["redis"] = Redis.from_url(REDIS_URL, decode_responses=True)
+
+
+async def shutdown(ctx):
+    await ctx["redis"].close()
+
+
+class WorkerSettings:
+    functions = [process_ask_question, process_explain_event]
+    redis_settings = RedisSettings.from_dsn(REDIS_URL)
+    on_startup = startup
+    on_shutdown = shutdown
+    max_jobs = 10
+    job_timeout = 120
+    keep_result = 3600
+    keep_result_forever = False