feat: Redis caching + async queue for LLM scaling (v1.6.0)
- Add async Redis client singleton (redis_client.py) for caching and arq pool
- Add arq job functions (jobs.py) for background LLM processing
- Cache ask/explain LLM responses with TTL (1h ask, 24h explain)
- Add async mode to /api/ask: enqueue job, return job_id, poll /api/jobs/{id}
- Add GET /api/jobs/{job_id} endpoint for job status polling
- Add arq worker service to docker-compose (dev + prod)
- Switch from Redis to Valkey (BSD fork) in Docker Compose
- Add REDIS_URL config setting
- Add tests for cache hit, async mode, and job status
This commit is contained in:
@@ -350,3 +350,124 @@ class TestAskEndpoint:
|
||||
data = response.json()
|
||||
assert data["query_info"]["event_count"] == 1
|
||||
assert data["events"][0]["id"] == "evt-bob"
|
||||
|
||||
|
||||
class TestAskCaching:
|
||||
def test_ask_cache_hit_returns_cached_answer(self, client, mock_events_collection, monkeypatch):
|
||||
"""If the answer is cached, the LLM should not be called."""
|
||||
now = datetime.now(UTC)
|
||||
mock_events_collection.insert_one(
|
||||
{
|
||||
"id": "evt-cache",
|
||||
"timestamp": now.isoformat(),
|
||||
"service": "Directory",
|
||||
"operation": "Add user",
|
||||
"result": "success",
|
||||
"actor_display": "Alice",
|
||||
"target_displays": ["USER-001"],
|
||||
"display_summary": "summary",
|
||||
"raw_text": "raw",
|
||||
}
|
||||
)
|
||||
|
||||
llm_called = False
|
||||
|
||||
async def fake_llm(question, events, total=None, excluded_services=None):
|
||||
nonlocal llm_called
|
||||
llm_called = True
|
||||
return "This should NOT appear."
|
||||
|
||||
monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")
|
||||
monkeypatch.setattr("routes.ask._call_llm", fake_llm)
|
||||
|
||||
# Pre-populate cache with a specific answer
|
||||
class CachingFakeRedis:
|
||||
def __init__(self):
|
||||
self.store = {}
|
||||
|
||||
async def get(self, key):
|
||||
return self.store.get(key)
|
||||
|
||||
async def setex(self, key, ttl, value):
|
||||
self.store[key] = value
|
||||
|
||||
redis = CachingFakeRedis()
|
||||
# Seed cache with the exact filters the endpoint will generate
|
||||
import asyncio
|
||||
from jobs import set_cached_ask
|
||||
filters_snapshot = {
|
||||
"services": None,
|
||||
"actor": None,
|
||||
"operation": None,
|
||||
"result": None,
|
||||
"start": None,
|
||||
"end": None,
|
||||
"include_tags": None,
|
||||
"exclude_tags": None,
|
||||
}
|
||||
asyncio.run(set_cached_ask(redis, "What happened to USER-001?", filters_snapshot, [{"id": "evt-cache"}], {"answer": "Cached answer!", "llm_used": True, "llm_error": None}))
|
||||
|
||||
async def fake_get_arq_pool():
|
||||
return redis
|
||||
|
||||
monkeypatch.setattr("routes.ask.get_arq_pool", fake_get_arq_pool)
|
||||
|
||||
response = client.post("/api/ask", json={"question": "What happened to USER-001?"})
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["answer"] == "Cached answer!"
|
||||
assert data["llm_used"] is True
|
||||
assert llm_called is False
|
||||
|
||||
def test_ask_async_mode_returns_job_id(self, client, mock_events_collection, monkeypatch):
|
||||
"""Async mode should return immediately with a job_id."""
|
||||
now = datetime.now(UTC)
|
||||
mock_events_collection.insert_one(
|
||||
{
|
||||
"id": "evt-async",
|
||||
"timestamp": now.isoformat(),
|
||||
"service": "Directory",
|
||||
"operation": "Add user",
|
||||
"result": "success",
|
||||
"actor_display": "Alice",
|
||||
"target_displays": ["USER-001"],
|
||||
"display_summary": "summary",
|
||||
"raw_text": "raw",
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")
|
||||
|
||||
# Mock arq pool to capture enqueue_job call
|
||||
class FakeArqPool:
|
||||
def __init__(self):
|
||||
self.enqueued = []
|
||||
|
||||
async def get(self, key):
|
||||
return None
|
||||
|
||||
async def setex(self, key, ttl, value):
|
||||
pass
|
||||
|
||||
async def enqueue_job(self, func, *args, **kwargs):
|
||||
from unittest.mock import MagicMock
|
||||
job = MagicMock()
|
||||
job.job_id = "job-12345"
|
||||
self.enqueued.append((func, args, kwargs))
|
||||
return job
|
||||
|
||||
pool = FakeArqPool()
|
||||
|
||||
async def fake_get_arq_pool():
|
||||
return pool
|
||||
|
||||
monkeypatch.setattr("routes.ask.get_arq_pool", fake_get_arq_pool)
|
||||
|
||||
response = client.post("/api/ask", json={"question": "What happened to USER-001?", "async_mode": True})
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["job_id"] == "job-12345"
|
||||
assert data["llm_used"] is False
|
||||
assert "being processed" in data["answer"]
|
||||
assert len(pool.enqueued) == 1
|
||||
assert pool.enqueued[0][0] == "process_ask_question"
|
||||
|
||||
Reference in New Issue
Block a user