From 90f0e14f6e4b51585575f547b3089a3a7deba6c5 Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Fri, 28 Nov 2025 21:43:44 +0100 Subject: [PATCH] First version --- .env.example | 4 + .gitignore | 12 ++ README.md | 105 +++++++++++ backend/Dockerfile | 6 + backend/config.py | 10 ++ backend/database.py | 6 + backend/frontend/index.html | 293 +++++++++++++++++++++++++++++++ backend/frontend/style.css | 271 ++++++++++++++++++++++++++++ backend/graph/audit_logs.py | 72 ++++++++ backend/graph/auth.py | 22 +++ backend/graph/resolve.py | 96 ++++++++++ backend/main.py | 17 ++ backend/maintenance.py | 107 +++++++++++ backend/mapping_loader.py | 59 +++++++ backend/mappings.yml | 31 ++++ backend/models/event_model.py | 206 ++++++++++++++++++++++ backend/requirements.txt | 6 + backend/routes/events.py | 104 +++++++++++ backend/routes/fetch.py | 40 +++++ backend/sources/intune_audit.py | 73 ++++++++ backend/sources/unified_audit.py | 106 +++++++++++ docker-compose.yml | 28 +++ 22 files changed, 1674 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100644 backend/Dockerfile create mode 100644 backend/config.py create mode 100644 backend/database.py create mode 100644 backend/frontend/index.html create mode 100644 backend/frontend/style.css create mode 100644 backend/graph/audit_logs.py create mode 100644 backend/graph/auth.py create mode 100644 backend/graph/resolve.py create mode 100644 backend/main.py create mode 100644 backend/maintenance.py create mode 100644 backend/mapping_loader.py create mode 100644 backend/mappings.yml create mode 100644 backend/models/event_model.py create mode 100644 backend/requirements.txt create mode 100644 backend/routes/events.py create mode 100644 backend/routes/fetch.py create mode 100644 backend/sources/intune_audit.py create mode 100644 backend/sources/unified_audit.py create mode 100644 docker-compose.yml diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..edfe7c7 --- /dev/null +++ b/.env.example @@ -0,0 +1,4 @@ +TENANT_ID=your-tenant-id +CLIENT_ID=your-client-id +CLIENT_SECRET=your-client-secret +MONGO_URI=mongodb://root:example@mongo:27017/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..21b4c57 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +.env +.DS_Store +__pycache__/ +*.py[cod] +.venv/ +venv/ +.pytest_cache/ +.mypy_cache/ +.coverage* +coverage.xml +.vscode/ +.idea/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..40e3c93 --- /dev/null +++ b/README.md @@ -0,0 +1,105 @@ +# Admin Operations Center (AOC) + +FastAPI microservice that ingests Microsoft Entra (Azure AD) and other admin audit logs into MongoDB, dedupes them, and exposes a UI/API to fetch, search, and review events. + +## Components +- FastAPI app under `backend/` with routes to fetch audit logs and list stored events. +- MongoDB for persistence (provisioned via Docker Compose). +- Microsoft Graph client (client credentials) for retrieving directory audit events and Intune audit events. +- Office 365 Management Activity API client for Exchange/SharePoint/Teams admin audit logs. +- Frontend served from the backend for filtering/searching events and viewing raw entries. + +## Prerequisites (macOS) +- Python 3.11 +- Docker Desktop (for the quickest start) or a local MongoDB instance +- An Entra app registration with **Application** permission `AuditLog.Read.All` and admin consent granted + - Also required to fetch other sources: + - `https://manage.office.com/.default` (Audit API) with `ActivityFeed.Read`/`ActivityFeed.ReadDlp` (built into the app registration’s API permissions for Office 365 Management APIs) + - Intune audit: `DeviceManagementConfiguration.Read.All` (or broader) for `/deviceManagement/auditEvents` + +## Configuration +Create a `.env` file at the repo root (copy `.env.example`) and fill in your Microsoft Graph app credentials. The provided `MONGO_URI` works with the bundled MongoDB container; change it if you use a different Mongo instance. + +```bash +cp .env.example .env +# edit .env to add TENANT_ID, CLIENT_ID, CLIENT_SECRET (and MONGO_URI if needed) +``` + +## Run with Docker Compose (recommended) +```bash +docker compose up --build +``` +- API: http://localhost:8000 +- Frontend: http://localhost:8000 +- Mongo: localhost:27017 (root/example) + +## Run locally without Docker +1) Start MongoDB (e.g. with Docker): + `docker run --rm -p 27017:27017 -e MONGO_INITDB_ROOT_USERNAME=root -e MONGO_INITDB_ROOT_PASSWORD=example mongo:7` + +2) Prepare the backend environment: +```bash +cd backend +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +export $(cat ../.env | xargs) # or set env vars manually +uvicorn main:app --reload --host 0.0.0.0 --port 8000 +``` + +## API +- `GET /api/fetch-audit-logs` — pulls the last 7 days by default (override with `?hours=N`, capped to 30 days) of: + - Entra directory audit logs (`/auditLogs/directoryAudits`) + - Exchange/SharePoint/Teams admin audits (via Office 365 Management Activity API) + - Intune audit logs (`/deviceManagement/auditEvents`) + Dedupes on a stable key (source id or timestamp/category/operation/target). Returns count and per-source warnings. +- `GET /api/events` — list stored events with filters: + - `service`, `actor`, `operation`, `result`, `start`, `end`, `search` (free text over raw/summary/actor/targets) + - Pagination: `page`, `page_size` (defaults 1, 50; max 500) +- `GET /api/filter-options` — best-effort distinct values for services, operations, results, actors (used by UI dropdowns). + +Stored document shape (collection `micro_soc.events`): +```json +{ + "id": "...", // original source id + "timestamp": "...", // activityDateTime + "service": "...", // category + "operation": "...", // activityDisplayName + "result": "...", + "actor_display": "...", // resolved user/app name + "target_displays": [ ... ], + "display_summary": "...", + "dedupe_key": "...", // used for upserts + "actor": { ... }, // initiatedBy + "targets": [ ... ], // targetResources + "raw": { ... }, // full source event + "raw_text": "..." // raw as string for text search +} +``` + +## Quick smoke tests +With the server running: +```bash +curl http://localhost:8000/api/events +curl http://localhost:8000/api/fetch-audit-logs +``` +- Visit the UI at http://localhost:8000 to filter by user/service/action/result/time, search raw text, paginate, and view raw events. + +## Maintenance (Dockerized) +Use the backend image so you don’t need a local venv: +```bash +# ensure Mongo + backend network are up +docker compose up -d mongo +# re-run enrichment/normalization on stored events (uses .env for Graph/Mongo) +docker compose run --rm backend python maintenance.py renormalize --limit 500 +# deduplicate existing events (optional) +docker compose run --rm backend python maintenance.py dedupe +``` +Omit `--limit` to process all events. You can also run commands inside a running backend container with `docker compose exec backend ...`. + +## Notes / Troubleshooting +- Ensure `TENANT_ID`, `CLIENT_ID`, and `CLIENT_SECRET` match an app registration with `AuditLog.Read.All` (application) permission and admin consent. +- Additional permissions: Office 365 Management Activity (`ActivityFeed.Read`), and Intune audit (`DeviceManagementConfiguration.Read.All`). +- Backfill limits: Management Activity API typically exposes ~7 days of history via API (longer if your tenant has extended/Advanced Audit retention). Directory/Intune audit retention follows your tenant policy (commonly 30–90 days, longer with Advanced Audit). +- If you change Mongo credentials/ports, update `MONGO_URI` in `.env` (Docker Compose passes it through to the backend). +- The service uses the `micro_soc` database and `events` collection by default; adjust in `backend/config.py` if needed. diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..f8fda52 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY . . +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 0000000..f0ace81 --- /dev/null +++ b/backend/config.py @@ -0,0 +1,10 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +TENANT_ID = os.getenv("TENANT_ID") +CLIENT_ID = os.getenv("CLIENT_ID") +CLIENT_SECRET = os.getenv("CLIENT_SECRET") +MONGO_URI = os.getenv("MONGO_URI") +DB_NAME = "micro_soc" diff --git a/backend/database.py b/backend/database.py new file mode 100644 index 0000000..76475e4 --- /dev/null +++ b/backend/database.py @@ -0,0 +1,6 @@ +from pymongo import MongoClient +from config import MONGO_URI, DB_NAME + +client = MongoClient(MONGO_URI) +db = client[DB_NAME] +events_collection = db["events"] diff --git a/backend/frontend/index.html b/backend/frontend/index.html new file mode 100644 index 0000000..f5cb997 --- /dev/null +++ b/backend/frontend/index.html @@ -0,0 +1,293 @@ + + + + + + AOC Events + + + +
+
+
+

Admin Operations Center

+

Directory Audit Explorer

+

Filter Microsoft Entra audit events by user, app, time, action, and action type.

+
+
+ + +
+
+ +
+
+ + + + + + + + +
+ + +
+
+
+ +
+
+

Events

+ +
+
+
+ +
+
+ + + + + + diff --git a/backend/frontend/style.css b/backend/frontend/style.css new file mode 100644 index 0000000..a2b71ad --- /dev/null +++ b/backend/frontend/style.css @@ -0,0 +1,271 @@ +:root { + --bg: #0d1117; + --panel: rgba(255, 255, 255, 0.04); + --border: rgba(255, 255, 255, 0.08); + --text: #e6edf3; + --muted: #94a3b8; + --accent: #7dd3fc; + --accent-strong: #38bdf8; + --warn: #f97316; + --ok: #22c55e; + --shadow: 0 20px 40px rgba(0, 0, 0, 0.25); + font-family: "SF Pro Display", "Helvetica Neue", "Segoe UI", sans-serif; +} + +* { + box-sizing: border-box; +} + +body { + margin: 0; + background: radial-gradient(circle at 20% 20%, rgba(56, 189, 248, 0.08), transparent 30%), + radial-gradient(circle at 80% 0%, rgba(125, 211, 252, 0.08), transparent 25%), + var(--bg); + color: var(--text); + min-height: 100vh; +} + +.page { + max-width: 1100px; + margin: 0 auto; + padding: 32px 20px 60px; +} + +.hero { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 16px; + margin-bottom: 20px; +} + +.eyebrow { + letter-spacing: 0.1em; + text-transform: uppercase; + color: var(--accent-strong); + font-size: 12px; + margin: 0 0 6px; +} + +h1 { + margin: 0 0 6px; + font-weight: 700; +} + +.lede { + margin: 0; + color: var(--muted); + max-width: 640px; +} + +.cta button, +button, +input[type="submit"] { + cursor: pointer; +} + +button { + background: linear-gradient(135deg, var(--accent), var(--accent-strong)); + color: #0b1220; + border: none; + padding: 12px 16px; + border-radius: 10px; + font-weight: 600; + box-shadow: var(--shadow); +} + +button.ghost { + background: transparent; + color: var(--text); + border: 1px solid var(--border); + box-shadow: none; +} + +.panel { + background: var(--panel); + border: 1px solid var(--border); + border-radius: 16px; + padding: 18px; + margin-bottom: 18px; + box-shadow: var(--shadow); +} + +.filters { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); + gap: 14px; +} + +.filters label { + display: flex; + flex-direction: column; + gap: 6px; + color: var(--muted); + font-size: 14px; +} + +input { + padding: 10px 12px; + border-radius: 10px; + border: 1px solid var(--border); + background: rgba(255, 255, 255, 0.02); + color: var(--text); +} + +.actions { + display: flex; + gap: 10px; + align-items: center; +} + +.panel-header { + display: flex; + align-items: center; + justify-content: space-between; + gap: 10px; + margin-bottom: 8px; +} + +#count { + color: var(--muted); + font-size: 14px; +} + +.status { + min-height: 22px; + color: var(--muted); + margin-bottom: 12px; +} + +.pagination { + display: flex; + align-items: center; + gap: 10px; + margin-top: 12px; +} + +.pagination button { + padding: 8px 12px; + border-radius: 8px; + border: 1px solid var(--border); + background: rgba(255, 255, 255, 0.04); + color: var(--text); + box-shadow: none; +} + +.pagination span { + color: var(--muted); +} + +.events { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); + gap: 12px; +} + +.event { + border: 1px solid var(--border); + border-radius: 14px; + padding: 14px; + background: rgba(255, 255, 255, 0.02); +} + +.event__meta { + display: flex; + gap: 8px; + align-items: center; + margin-bottom: 8px; +} + +.pill { + display: inline-block; + padding: 6px 10px; + border-radius: 999px; + background: rgba(125, 211, 252, 0.12); + border: 1px solid rgba(125, 211, 252, 0.4); + color: var(--text); + font-size: 12px; +} + +.pill--ok { + background: rgba(34, 197, 94, 0.15); + border-color: rgba(34, 197, 94, 0.5); +} + +.pill--warn { + background: rgba(249, 115, 22, 0.15); + border-color: rgba(249, 115, 22, 0.5); +} + +.event h3 { + margin: 0 0 6px; +} + +.event__detail { + margin: 4px 0; + color: var(--muted); + font-size: 14px; +} + +.event__actions { + margin-top: 10px; +} + +.modal { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.6); + display: flex; + align-items: center; + justify-content: center; + padding: 20px; + z-index: 10; +} + +.modal.hidden { + display: none; +} + +.modal__content { + width: min(900px, 95vw); + max-height: 85vh; + background: #0b0f19; + border: 1px solid var(--border); + border-radius: 14px; + padding: 16px; + box-shadow: var(--shadow); + display: flex; + flex-direction: column; +} + +.modal__header { + display: flex; + justify-content: space-between; + align-items: center; + gap: 12px; + margin-bottom: 10px; +} + +.modal pre { + background: rgba(255, 255, 255, 0.02); + color: var(--text); + border: 1px solid var(--border); + border-radius: 10px; + padding: 12px; + overflow: auto; + flex: 1; + font-size: 12px; + line-height: 1.5; + margin: 0; +} + +@media (max-width: 640px) { + .hero { + flex-direction: column; + } + + .actions { + flex-direction: column; + align-items: stretch; + } +} diff --git a/backend/graph/audit_logs.py b/backend/graph/audit_logs.py new file mode 100644 index 0000000..50a06f4 --- /dev/null +++ b/backend/graph/audit_logs.py @@ -0,0 +1,72 @@ +import requests +from datetime import datetime, timedelta +from graph.auth import get_access_token +from graph.resolve import resolve_directory_object, resolve_service_principal_owners + + +def fetch_audit_logs(hours=24, max_pages=50): + """Fetch paginated directory audit logs from Microsoft Graph and enrich with resolved names.""" + token = get_access_token() + start_time = (datetime.utcnow() - timedelta(hours=hours)).isoformat() + "Z" + next_url = ( + "https://graph.microsoft.com/v1.0/" + f"auditLogs/directoryAudits?$filter=activityDateTime ge {start_time}" + ) + headers = {"Authorization": f"Bearer {token}"} + + events = [] + pages_fetched = 0 + + while next_url: + if pages_fetched >= max_pages: + raise RuntimeError(f"Aborting pagination after {max_pages} pages to avoid runaway fetch.") + + try: + res = requests.get(next_url, headers=headers, timeout=20) + res.raise_for_status() + body = res.json() + except requests.RequestException as exc: + raise RuntimeError(f"Failed to fetch audit logs page: {exc}") from exc + except ValueError as exc: + raise RuntimeError(f"Invalid JSON response from Graph: {exc}") from exc + + events.extend(body.get("value", [])) + next_url = body.get("@odata.nextLink") + pages_fetched += 1 + + return _enrich_events(events, token) + + +def _enrich_events(events, token): + """ + Resolve actor/target IDs to readable names using Graph (requires Directory.Read.All). + Adds _resolvedActor, _resolvedActorOwners, and per-target _resolved fields. + """ + cache = {} + owner_cache = {} + + for event in events: + actor = event.get("initiatedBy", {}) or {} + user = actor.get("user", {}) or {} + sp = actor.get("servicePrincipal", {}) or {} + app = actor.get("app", {}) or {} + app_sp_id = app.get("servicePrincipalId") or app.get("servicePrincipalName") + + actor_id = user.get("id") or sp.get("id") or app_sp_id + + resolved_actor = resolve_directory_object(actor_id, token, cache) if actor_id else None + actor_owners = [] + if resolved_actor and resolved_actor.get("type") == "servicePrincipal": + actor_owners = resolve_service_principal_owners(resolved_actor.get("id"), token, owner_cache) + + event["_resolvedActor"] = resolved_actor + event["_resolvedActorOwners"] = actor_owners + + for target in event.get("targetResources", []) or []: + tid = target.get("id") + if tid: + resolved_target = resolve_directory_object(tid, token, cache) + if resolved_target: + target["_resolved"] = resolved_target + + return events diff --git a/backend/graph/auth.py b/backend/graph/auth.py new file mode 100644 index 0000000..d5776e0 --- /dev/null +++ b/backend/graph/auth.py @@ -0,0 +1,22 @@ +import requests +from config import TENANT_ID, CLIENT_ID, CLIENT_SECRET + + +def get_access_token(scope: str = "https://graph.microsoft.com/.default"): + """Request an application token from Microsoft identity platform.""" + url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token" + data = { + "grant_type": "client_credentials", + "client_id": CLIENT_ID, + "client_secret": CLIENT_SECRET, + "scope": scope, + } + try: + res = requests.post(url, data=data, timeout=15) + res.raise_for_status() + token = res.json().get("access_token") + if not token: + raise RuntimeError("Token endpoint returned no access_token") + return token + except requests.RequestException as exc: + raise RuntimeError(f"Failed to obtain access token: {exc}") from exc diff --git a/backend/graph/resolve.py b/backend/graph/resolve.py new file mode 100644 index 0000000..b7b4ea8 --- /dev/null +++ b/backend/graph/resolve.py @@ -0,0 +1,96 @@ +from typing import Dict, List, Optional + +import requests + + +def _name_from_payload(payload: dict, kind: str) -> str: + """Pick a readable name for a directory object payload.""" + if kind == "user": + upn = payload.get("userPrincipalName") or payload.get("mail") + display = payload.get("displayName") + if display and upn and display != upn: + return f"{display} ({upn})" + return display or upn or payload.get("id") or "Unknown user" + if kind == "servicePrincipal": + return ( + payload.get("displayName") + or payload.get("appDisplayName") + or payload.get("appId") + or payload.get("id") + or "Unknown app" + ) + if kind == "group": + return payload.get("displayName") or payload.get("mail") or payload.get("id") or "Unknown group" + if kind == "device": + return payload.get("displayName") or payload.get("id") or "Unknown device" + return payload.get("displayName") or payload.get("id") or "Unknown" + + +def _request_json(url: str, token: str) -> Optional[dict]: + try: + res = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=10) + if res.status_code == 404: + return None + res.raise_for_status() + return res.json() + except requests.RequestException: + return None + + +def resolve_directory_object(object_id: str, token: str, cache: Dict[str, dict]) -> Optional[dict]: + """ + Resolve a directory object (user, servicePrincipal, group, device) to a readable name. + Uses a simple multi-endpoint probe with caching to avoid extra Graph traffic. + """ + if not object_id: + return None + if object_id in cache: + return cache[object_id] + + probes = [ + ("user", f"https://graph.microsoft.com/v1.0/users/{object_id}?$select=id,displayName,userPrincipalName,mail"), + ("servicePrincipal", f"https://graph.microsoft.com/v1.0/servicePrincipals/{object_id}?$select=id,displayName,appId,appDisplayName"), + ("group", f"https://graph.microsoft.com/v1.0/groups/{object_id}?$select=id,displayName,mail"), + ("device", f"https://graph.microsoft.com/v1.0/devices/{object_id}?$select=id,displayName"), + ] + + for kind, url in probes: + payload = _request_json(url, token) + if payload: + resolved = { + "id": payload.get("id", object_id), + "type": kind, + "name": _name_from_payload(payload, kind), + } + cache[object_id] = resolved + return resolved + + cache[object_id] = None + return None + + +def resolve_service_principal_owners(sp_id: str, token: str, cache: Dict[str, List[str]]) -> List[str]: + """Return a list of owner display names for a service principal.""" + if not sp_id: + return [] + if sp_id in cache: + return cache[sp_id] + + owners = [] + url = ( + f"https://graph.microsoft.com/v1.0/servicePrincipals/{sp_id}" + "/owners?$select=id,displayName,userPrincipalName,mail" + ) + payload = _request_json(url, token) + for owner in (payload or {}).get("value", []): + name = ( + owner.get("displayName") + or owner.get("userPrincipalName") + or owner.get("mail") + or owner.get("id") + ) + if name: + owners.append(name) + + cache[sp_id] = owners + return owners diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..4fef48b --- /dev/null +++ b/backend/main.py @@ -0,0 +1,17 @@ +from pathlib import Path + +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles + +from routes.fetch import router as fetch_router +from routes.events import router as events_router + +app = FastAPI() + +app.include_router(fetch_router, prefix="/api") +app.include_router(events_router, prefix="/api") + +# Serve a minimal frontend for browsing events. Use an absolute path so it +# works regardless of the working directory used to start uvicorn. +frontend_dir = Path(__file__).parent / "frontend" +app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend") diff --git a/backend/maintenance.py b/backend/maintenance.py new file mode 100644 index 0000000..b01468f --- /dev/null +++ b/backend/maintenance.py @@ -0,0 +1,107 @@ +""" +Maintenance utilities for existing audit events. + +Run re-normalization (including Graph enrichment) over stored events to populate +new display fields. Example: + + python maintenance.py renormalize --limit 500 +""" +import argparse +from typing import List, Set + +from pymongo import UpdateOne + +from database import events_collection +from graph.auth import get_access_token +from graph.audit_logs import _enrich_events +from models.event_model import normalize_event, _make_dedupe_key + + +def renormalize(limit: int = None, batch_size: int = 200) -> int: + """ + Re-run enrichment + normalization on stored events using the latest mapping. + Returns the number of documents updated. + """ + token = get_access_token() + + cursor = events_collection.find({}, projection={"raw": 1}) + if limit: + cursor = cursor.limit(int(limit)) + + updated = 0 + batch: List[UpdateOne] = [] + + for doc in cursor: + raw = doc.get("raw") or {} + enriched = _enrich_events([raw], token)[0] + normalized = normalize_event(enriched) + # Preserve original _id + normalized.pop("_id", None) + + batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": normalized})) + if len(batch) >= batch_size: + events_collection.bulk_write(batch, ordered=False) + updated += len(batch) + batch = [] + + if batch: + events_collection.bulk_write(batch, ordered=False) + updated += len(batch) + + return updated + + +def dedupe(limit: int = None, batch_size: int = 500) -> int: + """ + Remove duplicate events based on dedupe_key. Keeps the first occurrence encountered. + """ + cursor = events_collection.find({}, projection={"_id": 1, "dedupe_key": 1, "raw": 1, "id": 1, "timestamp": 1}).sort("timestamp", 1) + if limit: + cursor = cursor.limit(int(limit)) + + seen: Set[str] = set() + to_delete = [] + processed = 0 + + for doc in cursor: + key = doc.get("dedupe_key") or _make_dedupe_key(doc.get("raw") or doc) + if not key: + continue + if key in seen: + to_delete.append(doc["_id"]) + else: + seen.add(key) + processed += 1 + if len(to_delete) >= batch_size: + events_collection.delete_many({"_id": {"$in": to_delete}}) + to_delete = [] + + if to_delete: + events_collection.delete_many({"_id": {"$in": to_delete}}) + + return len(seen) - processed if processed > len(seen) else 0 + + +def main(): + parser = argparse.ArgumentParser(description="Maintenance tasks") + sub = parser.add_subparsers(dest="command") + + rn = sub.add_parser("renormalize", help="Re-run enrichment/normalization on stored events") + rn.add_argument("--limit", type=int, default=None, help="Limit number of events to process") + + dd = sub.add_parser("dedupe", help="Remove duplicate events based on dedupe_key") + dd.add_argument("--limit", type=int, default=None, help="Limit number of events to scan (for testing)") + + args = parser.parse_args() + if args.command == "renormalize": + count = renormalize(limit=args.limit) + print(f"Renormalized {count} events") + elif args.command == "dedupe": + removed = dedupe(limit=args.limit) + print(f"Removed {removed} duplicate documents") + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/backend/mapping_loader.py b/backend/mapping_loader.py new file mode 100644 index 0000000..c328d7e --- /dev/null +++ b/backend/mapping_loader.py @@ -0,0 +1,59 @@ +from functools import lru_cache +from pathlib import Path +from typing import Any, Dict + +import yaml + + +DEFAULT_MAPPING: Dict[str, Any] = { + "category_labels": { + "ApplicationManagement": "Application", + "UserManagement": "User", + "GroupManagement": "Group", + "RoleManagement": "Role", + "Device": "Device", + "Policy": "Policy", + "ResourceManagement": "Resource", + }, + "summary_templates": { + "default": "{operation} on {target} by {actor}", + "Device": "{operation} on device {target} by {actor}", + "ApplicationManagement": "{operation} for app {target} by {actor}", + }, + "display": { + "default": { + "actor_field": "actor_display", + "actor_label": "User", + }, + "ApplicationManagement": { + "actor_field": "actor_upn", + "actor_label": "User", + }, + "Device": { + "actor_field": "target_display", + "actor_label": "Device", + }, + }, +} + + +@lru_cache(maxsize=1) +def get_mapping() -> Dict[str, Any]: + """ + Load mapping from mappings.yml if present; otherwise fall back to defaults. + Users can edit mappings.yml to change labels and summary templates. + """ + path = Path(__file__).parent / "mappings.yml" + if path.exists(): + try: + with path.open("r") as f: + data = yaml.safe_load(f) or {} + return { + "category_labels": data.get("category_labels") or DEFAULT_MAPPING["category_labels"], + "summary_templates": data.get("summary_templates") or DEFAULT_MAPPING["summary_templates"], + "display": data.get("display") or DEFAULT_MAPPING["display"], + } + except Exception: + # If mapping fails to load, use defaults to keep the app running. + return DEFAULT_MAPPING + return DEFAULT_MAPPING diff --git a/backend/mappings.yml b/backend/mappings.yml new file mode 100644 index 0000000..c1c119a --- /dev/null +++ b/backend/mappings.yml @@ -0,0 +1,31 @@ +# Human-readable mapping for normalizing audit events. +# You can edit this file to customize category labels and summary templates. + +category_labels: + ApplicationManagement: Application + UserManagement: User + GroupManagement: Group + RoleManagement: Role + Device: Device + Policy: Policy + ResourceManagement: Resource + +summary_templates: + # Use {operation}, {category}, {target}, {actor}, {result}, {service} placeholders. + default: "{operation} on {target} by {actor}" + Device: "{operation} on device {target} by {actor}" + ApplicationManagement: "{operation} for app {target} by {actor}" + +# Display preferences let you decide which field appears as the primary "actor" +# label in the UI. Available actor_field values: actor_display, actor_upn, +# target_display (uses the first target label). +display: + default: + actor_field: actor_display + actor_label: User + ApplicationManagement: + actor_field: actor_upn + actor_label: User + Device: + actor_field: target_display + actor_label: Device diff --git a/backend/models/event_model.py b/backend/models/event_model.py new file mode 100644 index 0000000..75dc4bb --- /dev/null +++ b/backend/models/event_model.py @@ -0,0 +1,206 @@ +import json + +from mapping_loader import get_mapping + + +CATEGORY_LABELS = { + "ApplicationManagement": "Application", + "UserManagement": "User", + "GroupManagement": "Group", + "RoleManagement": "Role", + "Device": "Device", + "Policy": "Policy", + "ResourceManagement": "Resource", +} + + +def _actor_display(actor: dict, resolved: dict = None, owners=None) -> str: + """Choose a human-readable actor label.""" + if resolved and resolved.get("name"): + name = resolved["name"] + if resolved.get("type") == "servicePrincipal" and owners: + owners_str = ", ".join(owners[:3]) + return f"{name} (owners: {owners_str})" if owners_str else name + return name + + if not actor: + return "Unknown actor" + + user = actor.get("user", {}) or {} + sp = actor.get("servicePrincipal", {}) or {} + app = actor.get("app", {}) or {} + upn = user.get("userPrincipalName") or user.get("mail") + display = user.get("displayName") + app_display = app.get("displayName") + + if display and upn and display != upn: + return f"{display} ({upn})" + + return ( + display + or upn + or app_display + or sp.get("displayName") + or sp.get("appId") + or actor.get("ipAddress") + or user.get("id") + or sp.get("id") + or "Unknown actor" + ) + + +def _target_displays(targets: list) -> list: + """Best-effort display labels for targets.""" + labels = [] + for t in targets or []: + resolved = t.get("_resolved") or {} + label = ( + resolved.get("name") + or resolved.get("id") + or t.get("displayName") + or t.get("userPrincipalName") + or t.get("logonId") + or t.get("id") + or "" + ) + if label: + labels.append(label) + return labels + + +def _target_types(targets: list) -> list: + """Collect target types for display mapping.""" + types = [] + for t in targets or []: + resolved = t.get("_resolved") or {} + t_type = ( + resolved.get("type") + or t.get("type") + ) + if t_type: + types.append(t_type) + return types + + +def _display_summary(operation: str, target_labels: list, actor_label: str, target_types: list, category: str) -> str: + action = operation or category or "Event" + target = target_labels[0] if target_labels else None + t_type = target_types[0] if target_types else None + + target_piece = None + if target and t_type: + target_piece = f"{t_type.lower()}: {target}" + elif target: + target_piece = target + + pieces = [p for p in [action, target_piece] if p] + if actor_label: + pieces.append(f"by {actor_label}") + return " | ".join(pieces) + + +def _render_summary(template: str, operation: str, actor: str, target: str, category: str, result: str, service: str) -> str: + try: + return template.format( + operation=operation or category or "Event", + actor=actor or "Unknown actor", + target=target or "target", + category=category or "Other", + result=result or "", + service=service or "", + ) + except Exception: + return "" + + +def _make_dedupe_key(e: dict, normalized_fields: dict = None) -> str: + """ + Build a stable key to prevent duplicates across sources. + Preference order: + - source event id (id) + category + - fallback to timestamp + category + operation + first target label + """ + norm = normalized_fields or {} + eid = e.get("id") or e.get("_id") or norm.get("id") + ts = e.get("activityDateTime") or e.get("timestamp") or norm.get("timestamp") + category = e.get("category") or e.get("service") or norm.get("service") + op = e.get("activityDisplayName") or e.get("operation") or norm.get("operation") + target_labels = norm.get("target_displays") or [] + target = target_labels[0] if target_labels else None + + if eid: + return "|".join(filter(None, [eid, category])) + + return "|".join(filter(None, [ts, category, op, target])) or None + + +def normalize_event(e): + actor = e.get("initiatedBy", {}) + targets = e.get("targetResources", []) + resolved_actor = e.get("_resolvedActor") + actor_owners = e.get("_resolvedActorOwners", []) + target_labels = _target_displays(targets) + target_types = _target_types(targets) + actor_label = _actor_display(actor, resolved_actor, actor_owners) + actor_upn = (actor.get("user") or {}).get("userPrincipalName") or (actor.get("user") or {}).get("mail") + first_target_label = target_labels[0] if target_labels else None + category = e.get("category") + mapping = get_mapping() + category_labels = mapping.get("category_labels") or {} + summary_templates = mapping.get("summary_templates") or {} + display_mapping = mapping.get("display") or {} + display_category = category_labels.get(category, category or "Other") + + operation = e.get("activityDisplayName") + template = summary_templates.get(category) or summary_templates.get("default") + summary = _render_summary( + template, + operation=operation, + actor=actor_label, + target=target_labels[0] if target_labels else None, + category=display_category, + result=e.get("result"), + service=e.get("loggedByService") or e.get("category"), + ) + + display_conf = display_mapping.get(category) or display_mapping.get("default", {}) + actor_field_pref = display_conf.get("actor_field", "actor_display") + actor_label_text = display_conf.get("actor_label", "User") + + if actor_field_pref == "actor_upn" and actor_upn: + display_actor_value = actor_upn + elif actor_field_pref == "target_display" and first_target_label: + display_actor_value = first_target_label + else: + display_actor_value = actor_label + + dedupe_key = _make_dedupe_key(e, { + "id": e.get("id"), + "timestamp": e.get("activityDateTime"), + "service": e.get("category"), + "operation": e.get("activityDisplayName"), + "target_displays": target_labels, + }) + + return { + "id": e.get("id"), + "timestamp": e.get("activityDateTime"), + "service": e.get("category"), + "operation": e.get("activityDisplayName"), + "result": e.get("result"), + "actor": actor, + "actor_resolved": resolved_actor, + "actor_owner_names": actor_owners, + "actor_display": actor_label, + "actor_upn": actor_upn, + "display_actor_label": actor_label_text, + "display_actor_value": display_actor_value, + "targets": targets, + "target_displays": target_labels, + "target_types": target_types, + "display_category": display_category, + "display_summary": summary, + "raw": e, + "raw_text": json.dumps(e, default=str), + "dedupe_key": dedupe_key, + } diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..997a5e3 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,6 @@ +fastapi +uvicorn[standard] +pymongo +python-dotenv +requests +PyYAML diff --git a/backend/routes/events.py b/backend/routes/events.py new file mode 100644 index 0000000..f863080 --- /dev/null +++ b/backend/routes/events.py @@ -0,0 +1,104 @@ +from fastapi import APIRouter, HTTPException +from database import events_collection + +router = APIRouter() + + +@router.get("/events") +def list_events( + service: str = None, + actor: str = None, + operation: str = None, + result: str = None, + start: str = None, + end: str = None, + search: str = None, + page: int = 1, + page_size: int = 50, +): + filters = [] + + if service: + filters.append({"service": service}) + if actor: + filters.append( + { + "$or": [ + {"actor_display": {"$regex": actor, "$options": "i"}}, + {"actor_upn": {"$regex": actor, "$options": "i"}}, + {"actor.user.userPrincipalName": {"$regex": actor, "$options": "i"}}, + {"actor.user.id": actor}, + ] + } + ) + if operation: + filters.append({"operation": {"$regex": operation, "$options": "i"}}) + if result: + filters.append({"result": {"$regex": result, "$options": "i"}}) + if start or end: + time_filter = {} + if start: + time_filter["$gte"] = start + if end: + time_filter["$lte"] = end + filters.append({"timestamp": time_filter}) + if search: + filters.append( + { + "$or": [ + {"raw_text": {"$regex": search, "$options": "i"}}, + {"display_summary": {"$regex": search, "$options": "i"}}, + {"actor_display": {"$regex": search, "$options": "i"}}, + {"target_displays": {"$elemMatch": {"$regex": search, "$options": "i"}}}, + {"operation": {"$regex": search, "$options": "i"}}, + ] + } + ) + + query = {"$and": filters} if filters else {} + + safe_page_size = max(1, min(page_size, 500)) + safe_page = max(1, page) + skip = (safe_page - 1) * safe_page_size + + try: + total = events_collection.count_documents(query) + cursor = events_collection.find(query).sort("timestamp", -1).skip(skip).limit(safe_page_size) + events = list(cursor) + except Exception as exc: + raise HTTPException(status_code=500, detail=f"Failed to query events: {exc}") from exc + + for e in events: + e["_id"] = str(e["_id"]) + return { + "items": events, + "total": total, + "page": safe_page, + "page_size": safe_page_size, + } + + +@router.get("/filter-options") +def filter_options(limit: int = 200): + """ + Provide distinct values for UI filters (best-effort, capped). + """ + safe_limit = max(1, min(limit, 1000)) + try: + services = sorted(events_collection.distinct("service"))[:safe_limit] + operations = sorted(events_collection.distinct("operation"))[:safe_limit] + results = sorted([r for r in events_collection.distinct("result") if r])[:safe_limit] + actors = sorted([a for a in events_collection.distinct("actor_display") if a])[:safe_limit] + actor_upns = sorted([a for a in events_collection.distinct("actor_upn") if a])[:safe_limit] + devices = sorted([a for a in events_collection.distinct("target_displays") if isinstance(a, str)])[:safe_limit] + except Exception as exc: + raise HTTPException(status_code=500, detail=f"Failed to load filter options: {exc}") from exc + + return { + "services": services, + "operations": operations, + "results": results, + "actors": actors, + "actor_upns": actor_upns, + "devices": devices, + } diff --git a/backend/routes/fetch.py b/backend/routes/fetch.py new file mode 100644 index 0000000..7baa661 --- /dev/null +++ b/backend/routes/fetch.py @@ -0,0 +1,40 @@ +from fastapi import APIRouter, HTTPException +from pymongo import UpdateOne + +from database import events_collection +from graph.audit_logs import fetch_audit_logs +from sources.unified_audit import fetch_unified_audit +from sources.intune_audit import fetch_intune_audit +from models.event_model import normalize_event + +router = APIRouter() + + +@router.get("/fetch-audit-logs") +def fetch_logs(hours: int = 168): + window = max(1, min(hours, 720)) # cap to 30 days for sanity + logs = [] + errors = [] + + def fetch_source(fn, label): + try: + return fn(hours=window) + except Exception as exc: + errors.append(f"{label}: {exc}") + return [] + + logs.extend(fetch_source(fetch_audit_logs, "Directory audit")) + logs.extend(fetch_source(fetch_unified_audit, "Unified audit (Exchange/SharePoint/Teams)")) + logs.extend(fetch_source(fetch_intune_audit, "Intune audit")) + + normalized = [normalize_event(e) for e in logs] + if normalized: + ops = [] + for doc in normalized: + key = doc.get("dedupe_key") + if key: + ops.append(UpdateOne({"dedupe_key": key}, {"$set": doc}, upsert=True)) + else: + ops.append(UpdateOne({"id": doc.get("id"), "timestamp": doc.get("timestamp")}, {"$set": doc}, upsert=True)) + events_collection.bulk_write(ops, ordered=False) + return {"stored_events": len(normalized), "errors": errors} diff --git a/backend/sources/intune_audit.py b/backend/sources/intune_audit.py new file mode 100644 index 0000000..c29687d --- /dev/null +++ b/backend/sources/intune_audit.py @@ -0,0 +1,73 @@ +import requests +from datetime import datetime, timedelta +from typing import List + +from graph.auth import get_access_token + + +def fetch_intune_audit(hours: int = 24, max_pages: int = 50) -> List[dict]: + """ + Fetch Intune audit events via Microsoft Graph. + Requires Intune audit permissions (e.g., DeviceManagementConfiguration.Read.All). + """ + token = get_access_token() + start_time = (datetime.utcnow() - timedelta(hours=hours)).isoformat() + "Z" + url = ( + "https://graph.microsoft.com/v1.0/deviceManagement/auditEvents" + f"?$filter=activityDateTime ge {start_time}" + ) + headers = {"Authorization": f"Bearer {token}"} + + events = [] + pages = 0 + while url: + if pages >= max_pages: + raise RuntimeError(f"Aborting Intune pagination after {max_pages} pages.") + try: + res = requests.get(url, headers=headers, timeout=20) + res.raise_for_status() + body = res.json() + except requests.RequestException as exc: + raise RuntimeError(f"Failed to fetch Intune audit logs: {exc}") from exc + except ValueError as exc: + raise RuntimeError(f"Invalid Intune response JSON: {exc}") from exc + + events.extend(body.get("value", [])) + url = body.get("@odata.nextLink") + pages += 1 + + return [_normalize_intune(e) for e in events] + + +def _normalize_intune(e: dict) -> dict: + """ + Map Intune audit event to normalized schema. + """ + actor = e.get("actor", {}) or {} + target = e.get("resources", [{}])[0] if e.get("resources") else {} + + return { + "id": e.get("id"), + "activityDateTime": e.get("activityDateTime"), + "category": e.get("category") or "Intune", + "activityDisplayName": e.get("activity") or e.get("activityType"), + "result": e.get("activityResult") or e.get("result"), + "initiatedBy": { + "user": { + "id": actor.get("userId"), + "userPrincipalName": actor.get("userPrincipalName"), + "displayName": actor.get("userName"), + "ipAddress": actor.get("ipAddress"), + } + }, + "targetResources": [ + { + "id": target.get("id"), + "displayName": target.get("displayName") or target.get("modifiedProperties", [{}])[0].get("displayName"), + "type": target.get("type"), + } + ] + if target + else [], + "raw": e, + } diff --git a/backend/sources/unified_audit.py b/backend/sources/unified_audit.py new file mode 100644 index 0000000..d1dbc84 --- /dev/null +++ b/backend/sources/unified_audit.py @@ -0,0 +1,106 @@ +import requests +from datetime import datetime, timedelta +from typing import List + +from graph.auth import get_access_token + + +AUDIT_CONTENT_TYPES = { + "Audit.Exchange": "Exchange admin audit", + "Audit.SharePoint": "SharePoint admin audit", + "Audit.General": "General (Teams/others)", +} + + +def _time_window(hours: int): + end = datetime.utcnow() + start = end - timedelta(hours=hours) + # Activity API expects UTC ISO without Z + return start.strftime("%Y-%m-%dT%H:%M:%S"), end.strftime("%Y-%m-%dT%H:%M:%S") + + +def _ensure_subscription(content_type: str, token: str, tenant_id: str): + url = f"https://manage.office.com/api/v1.0/{tenant_id}/activity/feed/subscriptions/start" + params = {"contentType": content_type} + headers = {"Authorization": f"Bearer {token}"} + try: + requests.post(url, params=params, headers=headers, timeout=10) + except requests.RequestException: + pass # best-effort + + +def _list_content(content_type: str, token: str, tenant_id: str, hours: int) -> List[dict]: + start, end = _time_window(hours) + url = f"https://manage.office.com/api/v1.0/{tenant_id}/activity/feed/subscriptions/content" + params = {"contentType": content_type, "startTime": start, "endTime": end} + headers = {"Authorization": f"Bearer {token}"} + try: + res = requests.get(url, params=params, headers=headers, timeout=20) + if res.status_code in (400, 401, 403, 404): + # Likely not enabled or insufficient perms; surface the text to the caller. + raise RuntimeError(f"{content_type} content listing failed ({res.status_code}): {res.text}") + return [] + res.raise_for_status() + return res.json() or [] + except requests.RequestException as exc: + raise RuntimeError(f"Failed to list {content_type} content: {exc}") from exc + + +def _download_content(content_uri: str, token: str) -> List[dict]: + headers = {"Authorization": f"Bearer {token}"} + try: + res = requests.get(content_uri, headers=headers, timeout=30) + res.raise_for_status() + return res.json() or [] + except requests.RequestException as exc: + raise RuntimeError(f"Failed to download audit content: {exc}") from exc + + +def fetch_unified_audit(hours: int = 24, max_files: int = 50) -> List[dict]: + """ + Fetch unified audit logs (Exchange, SharePoint, Teams policy changes via Audit.General) + using the Office 365 Management Activity API. + """ + # Need token for manage.office.com + token = get_access_token("https://manage.office.com/.default") + from config import TENANT_ID # local import to avoid cycles + + events = [] + + for content_type in AUDIT_CONTENT_TYPES.keys(): + _ensure_subscription(content_type, token, TENANT_ID) + contents = _list_content(content_type, token, TENANT_ID, hours) + for item in contents[:max_files]: + content_uri = item.get("contentUri") + if not content_uri: + continue + events.extend(_download_content(content_uri, token)) + + return [_normalize_unified(e) for e in events] + + +def _normalize_unified(e: dict) -> dict: + """ + Map unified audit log shape to the normalized schema used by the app. + """ + actor_user = { + "id": e.get("UserId"), + "userPrincipalName": e.get("UserId"), + "ipAddress": e.get("ClientIP"), + "displayName": e.get("UserId"), + } + target = { + "id": e.get("ObjectId") or e.get("OrganizationId"), + "displayName": e.get("ObjectId"), + "type": e.get("Workload"), + } + return { + "id": e.get("Id") or e.get("RecordType"), + "activityDateTime": e.get("CreationTime"), + "category": e.get("Workload"), + "activityDisplayName": e.get("Operation"), + "result": e.get("ResultStatus"), + "initiatedBy": {"user": actor_user}, + "targetResources": [target], + "raw": e, + } diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d91d984 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,28 @@ +services: + mongo: + image: mongo:7 + container_name: aoc-mongo + restart: always + ports: + - "27017:27017" + environment: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: example + volumes: + - mongo_data:/data/db + + backend: + build: ./backend + container_name: aoc-backend + restart: always + env_file: + - .env + environment: + MONGO_URI: mongodb://root:example@mongo:27017/ + depends_on: + - mongo + ports: + - "8000:8000" + +volumes: + mongo_data: