feat: raise LLM event limit to 200 and show total count awareness

- Bump LLM_MAX_EVENTS default from 50 to 200 - Add total_matched count to /api/ask response - Include 'Showing X of Y total' header in LLM prompt so the model knows when its view is a subset and avoids false certainty - Update system prompt to instruct acknowledging scale when truncated - Update test mocks to accept new total parameter
2026-04-20 16:13:52 +02:00
5 changed files with 14 additions and 9 deletions
--- a/.env.example
+++ b/.env.example
@@ -42,6 +42,6 @@ ALERTS_ENABLED=false
 LLM_API_KEY=
 LLM_BASE_URL=https://api.openai.com/v1
 LLM_MODEL=gpt-4o-mini
-LLM_MAX_EVENTS=50
+LLM_MAX_EVENTS=200
 LLM_TIMEOUT_SECONDS=30
 LLM_API_VERSION=
--- a/2
+++ b/2
@@ -1 +1 @@
-1.2.0
+1.2.1
--- a/backend/config.py
+++ b/backend/config.py
@@ -46,7 +46,7 @@ class Settings(BaseSettings):
    LLM_API_KEY: str = ""
    LLM_BASE_URL: str = "https://api.openai.com/v1"
    LLM_MODEL: str = "gpt-4o-mini"
-    LLM_MAX_EVENTS: int = 50
+    LLM_MAX_EVENTS: int = 200
    LLM_TIMEOUT_SECONDS: int = 30
    LLM_API_VERSION: str = ""  # e.g. 2025-01-01-preview for Azure OpenAI
--- a/backend/routes/ask.py
+++ b/backend/routes/ask.py
@@ -175,14 +175,17 @@ Rules:
 - Group related events together and tell a coherent story.
 - Highlight anything unusual, failed actions, or privilege escalations.
 - Reference specific event numbers (e.g., "Event #3") when making claims so the user can verify.
 - If the event list is a subset of a larger result set, acknowledge the scale (e.g., "At least 200 events occurred...").
 - If there are no events, say so clearly.
 - Keep the answer under 300 words.
 - Do not invent events that are not in the list.
 """
-def _format_events_for_llm(events: list[dict]) -> str:
+def _format_events_for_llm(events: list[dict], total: int | None = None) -> str:
    lines = []
    if total is not None and total > len(events):
        lines.append(f"Showing {len(events)} of {total} total matching events (most recent first):\n")
    for i, e in enumerate(events, 1):
        ts = e.get("timestamp") or "unknown time"
        op = e.get("operation") or "unknown action"
@@ -213,11 +216,11 @@ def _build_chat_url(base_url: str, api_version: str) -> str:
    return url
-async def _call_llm(question: str, events: list[dict]) -> str:
+async def _call_llm(question: str, events: list[dict], total: int | None = None) -> str:
    if not LLM_API_KEY:
        raise RuntimeError("LLM_API_KEY not configured")
-    context = _format_events_for_llm(events)
+    context = _format_events_for_llm(events, total=total)
    messages = [
        {"role": "system", "content": _SYSTEM_PROMPT},
        {
@@ -298,6 +301,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
    )
    try:
        total = events_collection.count_documents(query)
        cursor = events_collection.find(query).sort([("timestamp", -1)]).limit(LLM_MAX_EVENTS)
        events = list(cursor)
    except Exception as exc:
@@ -325,7 +329,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
        llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
    else:
        try:
-            answer = await _call_llm(question, events)
+            answer = await _call_llm(question, events, total=total)
            llm_used = True
        except Exception as exc:
            llm_error = f"LLM call failed: {exc}"
@@ -359,6 +363,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
            "start": start,
            "end": end,
            "event_count": len(events),
            "total_matched": total,
            "mongo_query": json.dumps(query, default=str),
        },
        llm_used=llm_used,
--- a/backend/tests/test_ask.py
+++ b/backend/tests/test_ask.py
@@ -236,7 +236,7 @@ class TestAskEndpoint:
            }
        )
-        async def fake_llm(question, events):
+        async def fake_llm(question, events, total=None):
            return "The device had a failed wipe attempt."
        monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")
@@ -265,7 +265,7 @@ class TestAskEndpoint:
            }
        )
-        async def failing_llm(question, events):
+        async def failing_llm(question, events, total=None):
            raise RuntimeError("LLM service down")
        monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")