feat: aggregate large event sets before sending to LLM
All checks were successful
CI / lint-and-test (push) Successful in 18s
Release / build-and-push (push) Successful in 29s

When a query matches >50 events, the LLM now receives:
- Aggregated counts by service, operation, result, and actor
- A list of failures (up to 10)
- The 50 most recent raw events as samples

This scales to thousands of events without blowing the token budget
or losing signal. The LLM gets a bird's-eye view plus concrete examples.

Also updates the system prompt to handle both individual event lists
and aggregated overviews correctly.
This commit is contained in:
2026-04-20 16:23:55 +02:00
parent cfe9397cc5
commit a255be93fe
2 changed files with 59 additions and 8 deletions

View File

@@ -1 +1 @@
1.2.1 1.2.2

View File

@@ -168,25 +168,76 @@ def _build_event_query(
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_SYSTEM_PROMPT = """You are an IT operations assistant. An administrator has asked a question about audit logs. _SYSTEM_PROMPT = """You are an IT operations assistant. An administrator has asked a question about audit logs.
Your job is to read the list of audit events below and write a concise, plain-language answer. Your job is to read the data below and write a concise, plain-language answer.
The input may be either:
- A small list of individual audit events (numbered Event #1, #2, etc.), or
- An aggregated overview with counts by service, action, result, and actor, plus sample events.
Rules: Rules:
- Assume the reader is a non-expert admin. - Assume the reader is a non-expert admin.
- Group related events together and tell a coherent story. - For aggregated overviews: summarise the scale, top patterns, and highlight anomalies or failures.
- For small event lists: group related events together and tell a coherent story.
- Highlight anything unusual, failed actions, or privilege escalations. - Highlight anything unusual, failed actions, or privilege escalations.
- Reference specific event numbers (e.g., "Event #3") when making claims so the user can verify. - Reference specific event numbers (e.g., "Event #3") when making claims so the user can verify.
- If the event list is a subset of a larger result set, acknowledge the scale (e.g., "At least 200 events occurred..."). - If the data is an aggregated subset of a larger result set, acknowledge the scale (e.g., "847 events occurred — the top pattern was...").
- If there are no events, say so clearly. - If there are no events, say so clearly.
- Keep the answer under 300 words. - Keep the answer under 300 words.
- Do not invent events that are not in the list. - Do not invent events or patterns that are not supported by the data.
""" """
def _aggregate_counts(events: list[dict]) -> dict:
"""Build lightweight aggregation tables for large result sets."""
from collections import Counter
svc_counts = Counter(e.get("service") or "Unknown" for e in events)
op_counts = Counter(e.get("operation") or "Unknown" for e in events)
result_counts = Counter(e.get("result") or "Unknown" for e in events)
actor_counts = Counter(e.get("actor_display") or "Unknown" for e in events)
return {
"services": svc_counts.most_common(10),
"operations": op_counts.most_common(10),
"results": result_counts.most_common(5),
"actors": actor_counts.most_common(10),
}
def _format_events_for_llm(events: list[dict], total: int | None = None) -> str: def _format_events_for_llm(events: list[dict], total: int | None = None) -> str:
lines = [] lines = []
if total is not None and total > len(events):
lines.append(f"Showing {len(events)} of {total} total matching events (most recent first):\n") # If we have a large result set, send aggregation + samples instead of raw dump
for i, e in enumerate(events, 1): if total is not None and total > len(events) and len(events) >= 50:
lines.append(f"Result set overview: {total} total events (showing the {len(events)} most recent).\n")
agg = _aggregate_counts(events)
lines.append("Breakdown by service:")
for svc, cnt in agg["services"]:
lines.append(f" {svc}: {cnt}")
lines.append("\nBreakdown by action:")
for op, cnt in agg["operations"]:
lines.append(f" {op}: {cnt}")
lines.append("\nBreakdown by result:")
for res, cnt in agg["results"]:
lines.append(f" {res}: {cnt}")
lines.append("\nTop actors:")
for actor, cnt in agg["actors"]:
lines.append(f" {actor}: {cnt}")
# Include failures and a few recent samples
failures = [e for e in events if str(e.get("result") or "").lower() in ("failure", "failed")]
if failures:
lines.append(f"\nFailures ({len(failures)}):")
for e in failures[:10]:
ts = e.get("timestamp", "?")[:16].replace("T", " ")
op = e.get("operation", "unknown")
actor = e.get("actor_display", "unknown")
lines.append(f" {ts}{op} by {actor}")
lines.append("\nMost recent sample events:")
else:
if total is not None and total > len(events):
lines.append(f"Showing {len(events)} of {total} total matching events (most recent first):\n")
# Always include the first N raw events as detail (up to 50)
for i, e in enumerate(events[:50], 1):
ts = e.get("timestamp") or "unknown time" ts = e.get("timestamp") or "unknown time"
op = e.get("operation") or "unknown action" op = e.get("operation") or "unknown action"
actor = e.get("actor_display") or "unknown actor" actor = e.get("actor_display") or "unknown actor"