feat: Redis caching + async queue for LLM scaling (v1.6.0)

- Add async Redis client singleton (redis_client.py) for caching and arq pool - Add arq job functions (jobs.py) for background LLM processing - Cache ask/explain LLM responses with TTL (1h ask, 24h explain) - Add async mode to /api/ask: enqueue job, return job_id, poll /api/jobs/{id} - Add GET /api/jobs/{job_id} endpoint for job status polling - Add arq worker service to docker-compose (dev + prod) - Switch from Redis to Valkey (BSD fork) in Docker Compose - Add REDIS_URL config setting - Add tests for cache hit, async mode, and job status
2026-04-22 09:55:05 +02:00
parent 47e0dfc2ca
commit f75f165911
16 changed files with 498 additions and 14 deletions
--- a/backend/routes/ask.py
+++ b/backend/routes/ask.py
@@ -18,7 +18,9 @@ from config import (
 )
 from database import events_collection
 from fastapi import APIRouter, Depends, HTTPException
+from jobs import get_cached_ask, get_cached_explain, set_cached_ask, set_cached_explain
 from models.api import AskRequest, AskResponse
+from redis_client import get_arq_pool

 router = APIRouter(dependencies=[Depends(require_auth)])
 logger = structlog.get_logger("aoc.ask")
@@ -640,14 +642,23 @@ async def explain_event(event_id: str, user: dict = Depends(require_auth)):
            "llm_error": "LLM_API_KEY not configured",
        }

+    # Check cache first
+    redis = await get_arq_pool()
+    cached = await get_cached_explain(redis, event_id)
+    if cached:
+        cached["related_count"] = len(related)
+        return cached
+
    try:
        explanation = await _explain_event(event, related)
-        return {
+        result = {
            "explanation": explanation,
            "llm_used": True,
            "llm_error": None,
            "related_count": len(related),
        }
+        await set_cached_explain(redis, event_id, result)
+        return result
    except Exception as exc:
        logger.warning("Event explanation failed", error=str(exc))
        return {
@@ -746,19 +757,70 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
            llm_error="LLM not used — no events found." if not LLM_API_KEY else None,
        )

-    # Try LLM summarisation
+    # Try LLM summarisation (with caching + optional async)
    answer = ""
    llm_used = False
    llm_error = None
-    if not LLM_API_KEY:
-        llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
+    job_id = None
+
+    filters_snapshot = {
+        "services": body.services,
+        "actor": body.actor,
+        "operation": body.operation,
+        "result": body.result,
+        "start": body.start,
+        "end": body.end,
+        "include_tags": body.include_tags,
+        "exclude_tags": body.exclude_tags,
+    }
+
+    if LLM_API_KEY:
+        redis = await get_arq_pool()
+        cached = await get_cached_ask(redis, question, filters_snapshot, events)
+        if cached:
+            answer = cached.get("answer", "")
+            llm_used = cached.get("llm_used", False)
+            llm_error = cached.get("llm_error")
+        elif body.async_mode:
+            pool = await get_arq_pool()
+            job = await pool.enqueue_job(
+                "process_ask_question",
+                question,
+                filters_snapshot,
+                events,
+                total,
+                excluded_services,
+            )
+            job_id = job.job_id if job else None
+            return AskResponse(
+                answer="Your question is being processed. Poll /api/jobs/{job_id} for the result.",
+                events=[_to_event_ref(e) for e in events],
+                query_info={
+                    "entity": entity,
+                    "start": start,
+                    "end": end,
+                    "event_count": len(events),
+                    "total_matched": total,
+                    "services_queried": query_services,
+                    "excluded_services": excluded_services,
+                    "mongo_query": json.dumps(query, default=str),
+                },
+                llm_used=False,
+                llm_error=None,
+                job_id=job_id,
+            )
+        else:
+            try:
+                answer = await _call_llm(question, events, total=total, excluded_services=excluded_services)
+                llm_used = True
+                await set_cached_ask(redis, question, filters_snapshot, events, {
+                    "answer": answer, "llm_used": True, "llm_error": None,
+                })
+            except Exception as exc:
+                llm_error = f"LLM call failed: {exc}"
+                logger.warning("LLM call failed, falling back to structured summary", error=str(exc))
    else:
-        try:
-            answer = await _call_llm(question, events, total=total, excluded_services=excluded_services)
-            llm_used = True
-        except Exception as exc:
-            llm_error = f"LLM call failed: {exc}"
-            logger.warning("LLM call failed, falling back to structured summary", error=str(exc))
+        llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."

    # Fallback: structured summary if LLM unavailable or failed
    if not answer:
@@ -797,4 +859,5 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
        },
        llm_used=llm_used,
        llm_error=llm_error,
+        job_id=job_id,
    )