feat: Redis caching + async queue for LLM scaling (v1.6.0)
- Add async Redis client singleton (redis_client.py) for caching and arq pool
- Add arq job functions (jobs.py) for background LLM processing
- Cache ask/explain LLM responses with TTL (1h ask, 24h explain)
- Add async mode to /api/ask: enqueue job, return job_id, poll /api/jobs/{id}
- Add GET /api/jobs/{job_id} endpoint for job status polling
- Add arq worker service to docker-compose (dev + prod)
- Switch from Redis to Valkey (BSD fork) in Docker Compose
- Add REDIS_URL config setting
- Add tests for cache hit, async mode, and job status
This commit is contained in:
@@ -18,7 +18,9 @@ from config import (
|
||||
)
|
||||
from database import events_collection
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from jobs import get_cached_ask, get_cached_explain, set_cached_ask, set_cached_explain
|
||||
from models.api import AskRequest, AskResponse
|
||||
from redis_client import get_arq_pool
|
||||
|
||||
router = APIRouter(dependencies=[Depends(require_auth)])
|
||||
logger = structlog.get_logger("aoc.ask")
|
||||
@@ -640,14 +642,23 @@ async def explain_event(event_id: str, user: dict = Depends(require_auth)):
|
||||
"llm_error": "LLM_API_KEY not configured",
|
||||
}
|
||||
|
||||
# Check cache first
|
||||
redis = await get_arq_pool()
|
||||
cached = await get_cached_explain(redis, event_id)
|
||||
if cached:
|
||||
cached["related_count"] = len(related)
|
||||
return cached
|
||||
|
||||
try:
|
||||
explanation = await _explain_event(event, related)
|
||||
return {
|
||||
result = {
|
||||
"explanation": explanation,
|
||||
"llm_used": True,
|
||||
"llm_error": None,
|
||||
"related_count": len(related),
|
||||
}
|
||||
await set_cached_explain(redis, event_id, result)
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.warning("Event explanation failed", error=str(exc))
|
||||
return {
|
||||
@@ -746,19 +757,70 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
|
||||
llm_error="LLM not used — no events found." if not LLM_API_KEY else None,
|
||||
)
|
||||
|
||||
# Try LLM summarisation
|
||||
# Try LLM summarisation (with caching + optional async)
|
||||
answer = ""
|
||||
llm_used = False
|
||||
llm_error = None
|
||||
if not LLM_API_KEY:
|
||||
llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
|
||||
job_id = None
|
||||
|
||||
filters_snapshot = {
|
||||
"services": body.services,
|
||||
"actor": body.actor,
|
||||
"operation": body.operation,
|
||||
"result": body.result,
|
||||
"start": body.start,
|
||||
"end": body.end,
|
||||
"include_tags": body.include_tags,
|
||||
"exclude_tags": body.exclude_tags,
|
||||
}
|
||||
|
||||
if LLM_API_KEY:
|
||||
redis = await get_arq_pool()
|
||||
cached = await get_cached_ask(redis, question, filters_snapshot, events)
|
||||
if cached:
|
||||
answer = cached.get("answer", "")
|
||||
llm_used = cached.get("llm_used", False)
|
||||
llm_error = cached.get("llm_error")
|
||||
elif body.async_mode:
|
||||
pool = await get_arq_pool()
|
||||
job = await pool.enqueue_job(
|
||||
"process_ask_question",
|
||||
question,
|
||||
filters_snapshot,
|
||||
events,
|
||||
total,
|
||||
excluded_services,
|
||||
)
|
||||
job_id = job.job_id if job else None
|
||||
return AskResponse(
|
||||
answer="Your question is being processed. Poll /api/jobs/{job_id} for the result.",
|
||||
events=[_to_event_ref(e) for e in events],
|
||||
query_info={
|
||||
"entity": entity,
|
||||
"start": start,
|
||||
"end": end,
|
||||
"event_count": len(events),
|
||||
"total_matched": total,
|
||||
"services_queried": query_services,
|
||||
"excluded_services": excluded_services,
|
||||
"mongo_query": json.dumps(query, default=str),
|
||||
},
|
||||
llm_used=False,
|
||||
llm_error=None,
|
||||
job_id=job_id,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
answer = await _call_llm(question, events, total=total, excluded_services=excluded_services)
|
||||
llm_used = True
|
||||
await set_cached_ask(redis, question, filters_snapshot, events, {
|
||||
"answer": answer, "llm_used": True, "llm_error": None,
|
||||
})
|
||||
except Exception as exc:
|
||||
llm_error = f"LLM call failed: {exc}"
|
||||
logger.warning("LLM call failed, falling back to structured summary", error=str(exc))
|
||||
else:
|
||||
try:
|
||||
answer = await _call_llm(question, events, total=total, excluded_services=excluded_services)
|
||||
llm_used = True
|
||||
except Exception as exc:
|
||||
llm_error = f"LLM call failed: {exc}"
|
||||
logger.warning("LLM call failed, falling back to structured summary", error=str(exc))
|
||||
llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
|
||||
|
||||
# Fallback: structured summary if LLM unavailable or failed
|
||||
if not answer:
|
||||
@@ -797,4 +859,5 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
|
||||
},
|
||||
llm_used=llm_used,
|
||||
llm_error=llm_error,
|
||||
job_id=job_id,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user