|
|
|
|
@@ -168,22 +168,76 @@ def _build_event_query(
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
_SYSTEM_PROMPT = """You are an IT operations assistant. An administrator has asked a question about audit logs.
|
|
|
|
|
Your job is to read the list of audit events below and write a concise, plain-language answer.
|
|
|
|
|
Your job is to read the data below and write a concise, plain-language answer.
|
|
|
|
|
|
|
|
|
|
The input may be either:
|
|
|
|
|
- A small list of individual audit events (numbered Event #1, #2, etc.), or
|
|
|
|
|
- An aggregated overview with counts by service, action, result, and actor, plus sample events.
|
|
|
|
|
|
|
|
|
|
Rules:
|
|
|
|
|
- Assume the reader is a non-expert admin.
|
|
|
|
|
- Group related events together and tell a coherent story.
|
|
|
|
|
- For aggregated overviews: summarise the scale, top patterns, and highlight anomalies or failures.
|
|
|
|
|
- For small event lists: group related events together and tell a coherent story.
|
|
|
|
|
- Highlight anything unusual, failed actions, or privilege escalations.
|
|
|
|
|
- Reference specific event numbers (e.g., "Event #3") when making claims so the user can verify.
|
|
|
|
|
- If the data is an aggregated subset of a larger result set, acknowledge the scale (e.g., "847 events occurred — the top pattern was...").
|
|
|
|
|
- If there are no events, say so clearly.
|
|
|
|
|
- Keep the answer under 300 words.
|
|
|
|
|
- Do not invent events that are not in the list.
|
|
|
|
|
- Do not invent events or patterns that are not supported by the data.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _format_events_for_llm(events: list[dict]) -> str:
|
|
|
|
|
def _aggregate_counts(events: list[dict]) -> dict:
|
|
|
|
|
"""Build lightweight aggregation tables for large result sets."""
|
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
|
|
svc_counts = Counter(e.get("service") or "Unknown" for e in events)
|
|
|
|
|
op_counts = Counter(e.get("operation") or "Unknown" for e in events)
|
|
|
|
|
result_counts = Counter(e.get("result") or "Unknown" for e in events)
|
|
|
|
|
actor_counts = Counter(e.get("actor_display") or "Unknown" for e in events)
|
|
|
|
|
return {
|
|
|
|
|
"services": svc_counts.most_common(10),
|
|
|
|
|
"operations": op_counts.most_common(10),
|
|
|
|
|
"results": result_counts.most_common(5),
|
|
|
|
|
"actors": actor_counts.most_common(10),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _format_events_for_llm(events: list[dict], total: int | None = None) -> str:
|
|
|
|
|
lines = []
|
|
|
|
|
for i, e in enumerate(events, 1):
|
|
|
|
|
|
|
|
|
|
# If we have a large result set, send aggregation + samples instead of raw dump
|
|
|
|
|
if total is not None and total > len(events) and len(events) >= 50:
|
|
|
|
|
lines.append(f"Result set overview: {total} total events (showing the {len(events)} most recent).\n")
|
|
|
|
|
agg = _aggregate_counts(events)
|
|
|
|
|
lines.append("Breakdown by service:")
|
|
|
|
|
for svc, cnt in agg["services"]:
|
|
|
|
|
lines.append(f" {svc}: {cnt}")
|
|
|
|
|
lines.append("\nBreakdown by action:")
|
|
|
|
|
for op, cnt in agg["operations"]:
|
|
|
|
|
lines.append(f" {op}: {cnt}")
|
|
|
|
|
lines.append("\nBreakdown by result:")
|
|
|
|
|
for res, cnt in agg["results"]:
|
|
|
|
|
lines.append(f" {res}: {cnt}")
|
|
|
|
|
lines.append("\nTop actors:")
|
|
|
|
|
for actor, cnt in agg["actors"]:
|
|
|
|
|
lines.append(f" {actor}: {cnt}")
|
|
|
|
|
# Include failures and a few recent samples
|
|
|
|
|
failures = [e for e in events if str(e.get("result") or "").lower() in ("failure", "failed")]
|
|
|
|
|
if failures:
|
|
|
|
|
lines.append(f"\nFailures ({len(failures)}):")
|
|
|
|
|
for e in failures[:10]:
|
|
|
|
|
ts = e.get("timestamp", "?")[:16].replace("T", " ")
|
|
|
|
|
op = e.get("operation", "unknown")
|
|
|
|
|
actor = e.get("actor_display", "unknown")
|
|
|
|
|
lines.append(f" {ts} — {op} by {actor}")
|
|
|
|
|
lines.append("\nMost recent sample events:")
|
|
|
|
|
else:
|
|
|
|
|
if total is not None and total > len(events):
|
|
|
|
|
lines.append(f"Showing {len(events)} of {total} total matching events (most recent first):\n")
|
|
|
|
|
|
|
|
|
|
# Always include the first N raw events as detail (up to 50)
|
|
|
|
|
for i, e in enumerate(events[:50], 1):
|
|
|
|
|
ts = e.get("timestamp") or "unknown time"
|
|
|
|
|
op = e.get("operation") or "unknown action"
|
|
|
|
|
actor = e.get("actor_display") or "unknown actor"
|
|
|
|
|
@@ -213,11 +267,11 @@ def _build_chat_url(base_url: str, api_version: str) -> str:
|
|
|
|
|
return url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _call_llm(question: str, events: list[dict]) -> str:
|
|
|
|
|
async def _call_llm(question: str, events: list[dict], total: int | None = None) -> str:
|
|
|
|
|
if not LLM_API_KEY:
|
|
|
|
|
raise RuntimeError("LLM_API_KEY not configured")
|
|
|
|
|
|
|
|
|
|
context = _format_events_for_llm(events)
|
|
|
|
|
context = _format_events_for_llm(events, total=total)
|
|
|
|
|
messages = [
|
|
|
|
|
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
|
|
|
{
|
|
|
|
|
@@ -298,6 +352,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
total = events_collection.count_documents(query)
|
|
|
|
|
cursor = events_collection.find(query).sort([("timestamp", -1)]).limit(LLM_MAX_EVENTS)
|
|
|
|
|
events = list(cursor)
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
@@ -325,7 +380,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
|
|
|
|
|
llm_error = "LLM_API_KEY is not configured. Set it in your .env to enable AI narrative summarisation."
|
|
|
|
|
else:
|
|
|
|
|
try:
|
|
|
|
|
answer = await _call_llm(question, events)
|
|
|
|
|
answer = await _call_llm(question, events, total=total)
|
|
|
|
|
llm_used = True
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
llm_error = f"LLM call failed: {exc}"
|
|
|
|
|
@@ -359,6 +414,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)):
|
|
|
|
|
"start": start,
|
|
|
|
|
"end": end,
|
|
|
|
|
"event_count": len(events),
|
|
|
|
|
"total_matched": total,
|
|
|
|
|
"mongo_query": json.dumps(query, default=str),
|
|
|
|
|
},
|
|
|
|
|
llm_used=llm_used,
|
|
|
|
|
|