feat: Redis caching + async queue for LLM scaling (v1.6.0)
Some checks failed
Release / build-and-push (push) Successful in 1m24s
CI / lint-and-test (push) Failing after 29s

- Add async Redis client singleton (redis_client.py) for caching and arq pool
- Add arq job functions (jobs.py) for background LLM processing
- Cache ask/explain LLM responses with TTL (1h ask, 24h explain)
- Add async mode to /api/ask: enqueue job, return job_id, poll /api/jobs/{id}
- Add GET /api/jobs/{job_id} endpoint for job status polling
- Add arq worker service to docker-compose (dev + prod)
- Switch from Redis to Valkey (BSD fork) in Docker Compose
- Add REDIS_URL config setting
- Add tests for cache hit, async mode, and job status
This commit is contained in:
2026-04-22 09:55:05 +02:00
parent 47e0dfc2ca
commit f75f165911
16 changed files with 498 additions and 14 deletions

View File

@@ -350,3 +350,124 @@ class TestAskEndpoint:
data = response.json()
assert data["query_info"]["event_count"] == 1
assert data["events"][0]["id"] == "evt-bob"
class TestAskCaching:
def test_ask_cache_hit_returns_cached_answer(self, client, mock_events_collection, monkeypatch):
"""If the answer is cached, the LLM should not be called."""
now = datetime.now(UTC)
mock_events_collection.insert_one(
{
"id": "evt-cache",
"timestamp": now.isoformat(),
"service": "Directory",
"operation": "Add user",
"result": "success",
"actor_display": "Alice",
"target_displays": ["USER-001"],
"display_summary": "summary",
"raw_text": "raw",
}
)
llm_called = False
async def fake_llm(question, events, total=None, excluded_services=None):
nonlocal llm_called
llm_called = True
return "This should NOT appear."
monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")
monkeypatch.setattr("routes.ask._call_llm", fake_llm)
# Pre-populate cache with a specific answer
class CachingFakeRedis:
def __init__(self):
self.store = {}
async def get(self, key):
return self.store.get(key)
async def setex(self, key, ttl, value):
self.store[key] = value
redis = CachingFakeRedis()
# Seed cache with the exact filters the endpoint will generate
import asyncio
from jobs import set_cached_ask
filters_snapshot = {
"services": None,
"actor": None,
"operation": None,
"result": None,
"start": None,
"end": None,
"include_tags": None,
"exclude_tags": None,
}
asyncio.run(set_cached_ask(redis, "What happened to USER-001?", filters_snapshot, [{"id": "evt-cache"}], {"answer": "Cached answer!", "llm_used": True, "llm_error": None}))
async def fake_get_arq_pool():
return redis
monkeypatch.setattr("routes.ask.get_arq_pool", fake_get_arq_pool)
response = client.post("/api/ask", json={"question": "What happened to USER-001?"})
assert response.status_code == 200
data = response.json()
assert data["answer"] == "Cached answer!"
assert data["llm_used"] is True
assert llm_called is False
def test_ask_async_mode_returns_job_id(self, client, mock_events_collection, monkeypatch):
"""Async mode should return immediately with a job_id."""
now = datetime.now(UTC)
mock_events_collection.insert_one(
{
"id": "evt-async",
"timestamp": now.isoformat(),
"service": "Directory",
"operation": "Add user",
"result": "success",
"actor_display": "Alice",
"target_displays": ["USER-001"],
"display_summary": "summary",
"raw_text": "raw",
}
)
monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")
# Mock arq pool to capture enqueue_job call
class FakeArqPool:
def __init__(self):
self.enqueued = []
async def get(self, key):
return None
async def setex(self, key, ttl, value):
pass
async def enqueue_job(self, func, *args, **kwargs):
from unittest.mock import MagicMock
job = MagicMock()
job.job_id = "job-12345"
self.enqueued.append((func, args, kwargs))
return job
pool = FakeArqPool()
async def fake_get_arq_pool():
return pool
monkeypatch.setattr("routes.ask.get_arq_pool", fake_get_arq_pool)
response = client.post("/api/ask", json={"question": "What happened to USER-001?", "async_mode": True})
assert response.status_code == 200
data = response.json()
assert data["job_id"] == "job-12345"
assert data["llm_used"] is False
assert "being processed" in data["answer"]
assert len(pool.enqueued) == 1
assert pool.enqueued[0][0] == "process_ask_question"