feat: Redis caching + async queue for LLM scaling (v1.6.0)
Some checks failed
Release / build-and-push (push) Successful in 1m24s
CI / lint-and-test (push) Failing after 29s

- Add async Redis client singleton (redis_client.py) for caching and arq pool
- Add arq job functions (jobs.py) for background LLM processing
- Cache ask/explain LLM responses with TTL (1h ask, 24h explain)
- Add async mode to /api/ask: enqueue job, return job_id, poll /api/jobs/{id}
- Add GET /api/jobs/{job_id} endpoint for job status polling
- Add arq worker service to docker-compose (dev + prod)
- Switch from Redis to Valkey (BSD fork) in Docker Compose
- Add REDIS_URL config setting
- Add tests for cache hit, async mode, and job status
This commit is contained in:
2026-04-22 09:55:05 +02:00
parent 47e0dfc2ca
commit f75f165911
16 changed files with 498 additions and 14 deletions

View File

@@ -18,7 +18,9 @@ from config import (
)
from database import events_collection
from fastapi import APIRouter, Depends, HTTPException
from jobs import get_cached_ask, get_cached_explain, set_cached_ask, set_cached_explain
from models.api import AskRequest, AskResponse
from redis_client import get_arq_pool
router = APIRouter(dependencies=[Depends(require_auth)])
logger = structlog.get_logger("aoc.ask")
@@ -640,14 +642,23 @@ async def explain_event(event_id: str, user: dict = Depends(require_auth)):
"llm_error": "LLM_API_KEY not configured",
}
# Check cache first
redis = await get_arq_pool()
cached = await get_cached_explain(redis, event_id)
if cached:
cached["related_count"] = len(related)
return cached
try:
explanation = await _explain_event(event, related)
return {
result = {
"explanation": explanation,
"llm_used": True,
"llm_error": None,
"related_count": len(related),
}
await set_cached_explain(redis, event_id, result)
return result
except Exception as exc:
logger.warning("Event explanation failed", error=str(exc))
return {
@@ -746,19 +757,70 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
llm_error="LLM not used — no events found." if not LLM_API_KEY else None,
)
# Try LLM summarisation
# Try LLM summarisation (with caching + optional async)
answer = ""
llm_used = False
llm_error = None
if not LLM_API_KEY:
llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
job_id = None
filters_snapshot = {
"services": body.services,
"actor": body.actor,
"operation": body.operation,
"result": body.result,
"start": body.start,
"end": body.end,
"include_tags": body.include_tags,
"exclude_tags": body.exclude_tags,
}
if LLM_API_KEY:
redis = await get_arq_pool()
cached = await get_cached_ask(redis, question, filters_snapshot, events)
if cached:
answer = cached.get("answer", "")
llm_used = cached.get("llm_used", False)
llm_error = cached.get("llm_error")
elif body.async_mode:
pool = await get_arq_pool()
job = await pool.enqueue_job(
"process_ask_question",
question,
filters_snapshot,
events,
total,
excluded_services,
)
job_id = job.job_id if job else None
return AskResponse(
answer="Your question is being processed. Poll /api/jobs/{job_id} for the result.",
events=[_to_event_ref(e) for e in events],
query_info={
"entity": entity,
"start": start,
"end": end,
"event_count": len(events),
"total_matched": total,
"services_queried": query_services,
"excluded_services": excluded_services,
"mongo_query": json.dumps(query, default=str),
},
llm_used=False,
llm_error=None,
job_id=job_id,
)
else:
try:
answer = await _call_llm(question, events, total=total, excluded_services=excluded_services)
llm_used = True
await set_cached_ask(redis, question, filters_snapshot, events, {
"answer": answer, "llm_used": True, "llm_error": None,
})
except Exception as exc:
llm_error = f"LLM call failed: {exc}"
logger.warning("LLM call failed, falling back to structured summary", error=str(exc))
else:
try:
answer = await _call_llm(question, events, total=total, excluded_services=excluded_services)
llm_used = True
except Exception as exc:
llm_error = f"LLM call failed: {exc}"
logger.warning("LLM call failed, falling back to structured summary", error=str(exc))
llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
# Fallback: structured summary if LLM unavailable or failed
if not answer:
@@ -797,4 +859,5 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
},
llm_used=llm_used,
llm_error=llm_error,
job_id=job_id,
)