feat: aggregate large event sets before sending to LLM

When a query matches >50 events, the LLM now receives: - Aggregated counts by service, operation, result, and actor - A list of failures (up to 10) - The 50 most recent raw events as samples This scales to thousands of events without blowing the token budget or losing signal. The LLM gets a bird's-eye view plus concrete examples. Also updates the system prompt to handle both individual event lists and aggregated overviews correctly.
feat: raise LLM event limit to 200 and show total count awareness
2026-04-20 16:23:55 +02:00 · 2026-04-20 16:13:52 +02:00
5 changed files with 69 additions and 13 deletions
--- a/.env.example
+++ b/.env.example
@@ -42,6 +42,6 @@ ALERTS_ENABLED=false
 LLM_API_KEY=
 LLM_BASE_URL=https://api.openai.com/v1
 LLM_MODEL=gpt-4o-mini
-LLM_MAX_EVENTS=50
+LLM_MAX_EVENTS=200
 LLM_TIMEOUT_SECONDS=30
 LLM_API_VERSION=
--- a/2
+++ b/2
@@ -1 +1 @@
-1.2.0
+1.2.2
--- a/backend/config.py
+++ b/backend/config.py
@@ -46,7 +46,7 @@ class Settings(BaseSettings):
    LLM_API_KEY: str = ""
    LLM_BASE_URL: str = "https://api.openai.com/v1"
    LLM_MODEL: str = "gpt-4o-mini"
-    LLM_MAX_EVENTS: int = 50
+    LLM_MAX_EVENTS: int = 200
    LLM_TIMEOUT_SECONDS: int = 30
    LLM_API_VERSION: str = ""  # e.g. 2025-01-01-preview for Azure OpenAI

--- a/backend/routes/ask.py
+++ b/backend/routes/ask.py
@@ -168,22 +168,76 @@ def _build_event_query(
 # ---------------------------------------------------------------------------

 _SYSTEM_PROMPT = """You are an IT operations assistant. An administrator has asked a question about audit logs.
-Your job is to read the list of audit events below and write a concise, plain-language answer.
+Your job is to read the data below and write a concise, plain-language answer.
+
+The input may be either:
+- A small list of individual audit events (numbered Event #1, #2, etc.), or
+- An aggregated overview with counts by service, action, result, and actor, plus sample events.

 Rules:
 - Assume the reader is a non-expert admin.
- Group related events together and tell a coherent story.
+- For aggregated overviews: summarise the scale, top patterns, and highlight anomalies or failures.
+- For small event lists: group related events together and tell a coherent story.
 - Highlight anything unusual, failed actions, or privilege escalations.
 - Reference specific event numbers (e.g., "Event #3") when making claims so the user can verify.
+- If the data is an aggregated subset of a larger result set, acknowledge the scale (e.g., "847 events occurred — the top pattern was...").
 - If there are no events, say so clearly.
 - Keep the answer under 300 words.
- Do not invent events that are not in the list.
+- Do not invent events or patterns that are not supported by the data.
 """


-def _format_events_for_llm(events: list[dict]) -> str:
+def _aggregate_counts(events: list[dict]) -> dict:
+    """Build lightweight aggregation tables for large result sets."""
+    from collections import Counter
+
+    svc_counts = Counter(e.get("service") or "Unknown" for e in events)
+    op_counts = Counter(e.get("operation") or "Unknown" for e in events)
+    result_counts = Counter(e.get("result") or "Unknown" for e in events)
+    actor_counts = Counter(e.get("actor_display") or "Unknown" for e in events)
+    return {
+        "services": svc_counts.most_common(10),
+        "operations": op_counts.most_common(10),
+        "results": result_counts.most_common(5),
+        "actors": actor_counts.most_common(10),
+    }
+
+
+def _format_events_for_llm(events: list[dict], total: int | None = None) -> str:
    lines = []
-    for i, e in enumerate(events, 1):
+
+    # If we have a large result set, send aggregation + samples instead of raw dump
+    if total is not None and total > len(events) and len(events) >= 50:
+        lines.append(f"Result set overview: {total} total events (showing the {len(events)} most recent).\n")
+        agg = _aggregate_counts(events)
+        lines.append("Breakdown by service:")
+        for svc, cnt in agg["services"]:
+            lines.append(f"  {svc}: {cnt}")
+        lines.append("\nBreakdown by action:")
+        for op, cnt in agg["operations"]:
+            lines.append(f"  {op}: {cnt}")
+        lines.append("\nBreakdown by result:")
+        for res, cnt in agg["results"]:
+            lines.append(f"  {res}: {cnt}")
+        lines.append("\nTop actors:")
+        for actor, cnt in agg["actors"]:
+            lines.append(f"  {actor}: {cnt}")
+        # Include failures and a few recent samples
+        failures = [e for e in events if str(e.get("result") or "").lower() in ("failure", "failed")]
+        if failures:
+            lines.append(f"\nFailures ({len(failures)}):")
+            for e in failures[:10]:
+                ts = e.get("timestamp", "?")[:16].replace("T", " ")
+                op = e.get("operation", "unknown")
+                actor = e.get("actor_display", "unknown")
+                lines.append(f"  {ts} — {op} by {actor}")
+        lines.append("\nMost recent sample events:")
+    else:
+        if total is not None and total > len(events):
+            lines.append(f"Showing {len(events)} of {total} total matching events (most recent first):\n")
+
+    # Always include the first N raw events as detail (up to 50)
+    for i, e in enumerate(events[:50], 1):
        ts = e.get("timestamp") or "unknown time"
        op = e.get("operation") or "unknown action"
        actor = e.get("actor_display") or "unknown actor"
@@ -213,11 +267,11 @@ def _build_chat_url(base_url: str, api_version: str) -> str:
    return url


-async def _call_llm(question: str, events: list[dict]) -> str:
+async def _call_llm(question: str, events: list[dict], total: int | None = None) -> str:
    if not LLM_API_KEY:
        raise RuntimeError("LLM_API_KEY not configured")

-    context = _format_events_for_llm(events)
+    context = _format_events_for_llm(events, total=total)
    messages = [
        {"role": "system", "content": _SYSTEM_PROMPT},
        {
@@ -298,6 +352,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
    )

    try:
+        total = events_collection.count_documents(query)
        cursor = events_collection.find(query).sort([("timestamp", -1)]).limit(LLM_MAX_EVENTS)
        events = list(cursor)
    except Exception as exc:
@@ -325,7 +380,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
        llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
    else:
        try:
-            answer = await _call_llm(question, events)
+            answer = await _call_llm(question, events, total=total)
            llm_used = True
        except Exception as exc:
            llm_error = f"LLM call failed: {exc}"
@@ -359,6 +414,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
            "start": start,
            "end": end,
            "event_count": len(events),
+            "total_matched": total,
            "mongo_query": json.dumps(query, default=str),
        },
        llm_used=llm_used,
--- a/backend/tests/test_ask.py
+++ b/backend/tests/test_ask.py
@@ -236,7 +236,7 @@ class TestAskEndpoint:
            }
        )

-        async def fake_llm(question, events):
+        async def fake_llm(question, events, total=None):
            return "The device had a failed wipe attempt."

        monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")
@@ -265,7 +265,7 @@ class TestAskEndpoint:
            }
        )

-        async def failing_llm(question, events):
+        async def failing_llm(question, events, total=None):
            raise RuntimeError("LLM service down")

        monkeypatch.setattr("routes.ask.LLM_API_KEY", "fake-key")
@@ -1 +1 @@
 .2.0
 .2.2