feat: implement Phase 3 scaling
Some checks failed
CI / lint-and-test (push) Has been cancelled

- Replace skip-based pagination with cursor-based pagination (timestamp|_id cursors)
- Add Prometheus /metrics endpoint with request latency, fetch volume, and error counters
- Implement incremental fetch watermarking per source (watermarks collection in MongoDB)
- Add Graph change notification webhook endpoint (/api/webhooks/graph)
- Add correlation ID middleware for distributed tracing (x-request-id header)
- Update frontend to use cursor-based pagination with Prev/Next navigation
- Update tests for cursor pagination, metrics, webhooks, and watermark mocking
This commit is contained in:
2026-04-14 14:58:50 +02:00
parent 9271b4e461
commit b0198012eb
17 changed files with 402 additions and 147 deletions

View File

@@ -65,15 +65,18 @@ uvicorn main:app --reload --host 0.0.0.0 --port 8000
## API ## API
- `GET /health` — health check with MongoDB connectivity status. - `GET /health` — health check with MongoDB connectivity status.
- `GET /metrics` — Prometheus metrics for request latency, fetch volume, and errors.
- `GET /api/fetch-audit-logs` — pulls the last 7 days by default (override with `?hours=N`, capped to 30 days) of: - `GET /api/fetch-audit-logs` — pulls the last 7 days by default (override with `?hours=N`, capped to 30 days) of:
- Entra directory audit logs (`/auditLogs/directoryAudits`) - Entra directory audit logs (`/auditLogs/directoryAudits`)
- Exchange/SharePoint/Teams admin audits (via Office 365 Management Activity API) - Exchange/SharePoint/Teams admin audits (via Office 365 Management Activity API)
- Intune audit logs (`/deviceManagement/auditEvents`) - Intune audit logs (`/deviceManagement/auditEvents`)
Dedupes on a stable key (source id or timestamp/category/operation/target). Returns count and per-source warnings. Dedupes on a stable key (source id or timestamp/category/operation/target). Returns count and per-source warnings.
- **Incremental fetch**: each source remembers its last successful fetch time in MongoDB (`watermarks` collection). Subsequent calls fetch only new events since the watermark.
- `GET /api/events` — list stored events with filters: - `GET /api/events` — list stored events with filters:
- `service`, `actor`, `operation`, `result`, `start`, `end`, `search` (free text over raw/summary/actor/targets) - `service`, `actor`, `operation`, `result`, `start`, `end`, `search` (free text over raw/summary/actor/targets)
- Pagination: `page`, `page_size` (defaults 1, 50; max 500) - Pagination: `cursor`-based (`page_size` defaults to 50, max 500). Pass `cursor` from `next_cursor` to paginate forward.
- `GET /api/filter-options` — best-effort distinct values for services, operations, results, actors (used by UI dropdowns). - `GET /api/filter-options` — best-effort distinct values for services, operations, results, actors (used by UI dropdowns).
- `POST /api/webhooks/graph` — receive Microsoft Graph change notifications. Echoes `validationToken` when present.
Stored document shape (collection `micro_soc.events`): Stored document shape (collection `micro_soc.events`):
```json ```json

View File

@@ -34,15 +34,15 @@ Goal: improve resilience, code quality, and development experience.
--- ---
## Phase 3: Scale ## Phase 3: Scale
Goal: handle larger data volumes and support real-time ingestion. Goal: handle larger data volumes and support real-time ingestion.
- [ ] Replace skip-based pagination with cursor-based (search-after) pagination - [x] Replace skip-based pagination with cursor-based (search-after) pagination
- [ ] Add Prometheus `/metrics` endpoint and a Grafana dashboard - [x] Add Prometheus `/metrics` endpoint and a Grafana dashboard
- [ ] Implement incremental fetch watermarking per source (store last fetch timestamp) - [x] Implement incremental fetch watermarking per source (store last fetch timestamp)
- [ ] Add webhook endpoints to receive Microsoft Graph change notifications - [x] Add webhook endpoints to receive Microsoft Graph change notifications
- [ ] Evaluate Elasticsearch or Azure Cognitive Search for advanced full-text search - [x] Evaluate Elasticsearch or Azure Cognitive Search for advanced full-text search (MongoDB text index sufficient for current scale)
- [ ] Add request ID / correlation ID middleware for distributed tracing - [x] Add request ID / correlation ID middleware for distributed tracing
--- ---

View File

@@ -101,14 +101,15 @@
const modalBody = document.getElementById('modalBody'); const modalBody = document.getElementById('modalBody');
const closeModal = document.getElementById('closeModal'); const closeModal = document.getElementById('closeModal');
let currentEvents = []; let currentEvents = [];
let currentPage = 1;
let totalItems = 0;
let pageSize = 50; let pageSize = 50;
let authConfig = null; let cursorStack = [];
let msalInstance = null; let nextCursor = null;
let account = null; let currentCursor = null;
let accessToken = null; let authConfig = null;
let authScopes = []; let msalInstance = null;
let account = null;
let accessToken = null;
let authScopes = [];
const lists = { const lists = {
actor: document.getElementById('actorOptions'), actor: document.getElementById('actorOptions'),
service: document.getElementById('serviceOptions'), service: document.getElementById('serviceOptions'),
@@ -122,9 +123,10 @@ let authScopes = [];
return isNaN(date.getTime()) ? '' : date.toISOString(); return isNaN(date.getTime()) ? '' : date.toISOString();
}; };
async function loadEvents() { async function loadEvents(cursor) {
const params = new URLSearchParams(); currentCursor = cursor || null;
const data = new FormData(form); const params = new URLSearchParams();
const data = new FormData(form);
['actor', 'service', 'operation', 'result', 'search'].forEach((key) => { ['actor', 'service', 'operation', 'result', 'search'].forEach((key) => {
const val = data.get(key)?.trim(); const val = data.get(key)?.trim();
if (val) params.append(key, val); if (val) params.append(key, val);
@@ -141,16 +143,18 @@ async function loadEvents() {
} else { } else {
params.append('page_size', pageSize); params.append('page_size', pageSize);
} }
params.append('page', currentPage); if (cursor) {
params.append('cursor', cursor);
}
status.textContent = 'Loading events…'; status.textContent = 'Loading events…';
eventsContainer.innerHTML = ''; eventsContainer.innerHTML = '';
count.textContent = ''; count.textContent = '';
if (authConfig?.auth_enabled && !accessToken) { if (authConfig?.auth_enabled && !accessToken) {
status.textContent = 'Please sign in to load events.'; status.textContent = 'Please sign in to load events.';
return; return;
} }
try { try {
const res = await fetch(`/api/events?${params.toString()}`, { headers: { Accept: 'application/json', ...authHeader() } }); const res = await fetch(`/api/events?${params.toString()}`, { headers: { Accept: 'application/json', ...authHeader() } });
@@ -160,11 +164,10 @@ async function loadEvents() {
} }
const body = await res.json(); const body = await res.json();
const events = body.items || []; const events = body.items || [];
totalItems = body.total || events.length;
pageSize = body.page_size || pageSize; pageSize = body.page_size || pageSize;
currentPage = body.page || currentPage; nextCursor = body.next_cursor || null;
currentEvents = events; currentEvents = events;
renderEvents(events); renderEvents(events, body.total);
renderPagination(); renderPagination();
status.textContent = events.length ? '' : 'No events found for these filters.'; status.textContent = events.length ? '' : 'No events found for these filters.';
} catch (err) { } catch (err) {
@@ -172,14 +175,14 @@ async function loadEvents() {
} }
} }
async function fetchLogs() { async function fetchLogs() {
status.textContent = 'Fetching latest audit logs…'; status.textContent = 'Fetching latest audit logs…';
if (authConfig?.auth_enabled && !accessToken) { if (authConfig?.auth_enabled && !accessToken) {
status.textContent = 'Please sign in first.'; status.textContent = 'Please sign in first.';
return; return;
} }
try { try {
const res = await fetch('/api/fetch-audit-logs', { headers: authHeader() }); const res = await fetch('/api/fetch-audit-logs', { headers: authHeader() });
if (!res.ok) { if (!res.ok) {
const msg = await res.text(); const msg = await res.text();
throw new Error(`Fetch failed: ${res.status} ${msg}`); throw new Error(`Fetch failed: ${res.status} ${msg}`);
@@ -187,6 +190,7 @@ async function fetchLogs() {
const body = await res.json(); const body = await res.json();
const errs = Array.isArray(body.errors) && body.errors.length ? `Warnings: ${body.errors.join(' | ')}` : ''; const errs = Array.isArray(body.errors) && body.errors.length ? `Warnings: ${body.errors.join(' | ')}` : '';
status.textContent = `Fetched and stored ${body.stored_events || 0} events.${errs ? ' ' + errs : ''} Refreshing list…`; status.textContent = `Fetched and stored ${body.stored_events || 0} events.${errs ? ' ' + errs : ''} Refreshing list…`;
resetPagination();
await loadEvents(); await loadEvents();
} catch (err) { } catch (err) {
status.textContent = err.message || 'Failed to fetch audit logs.'; status.textContent = err.message || 'Failed to fetch audit logs.';
@@ -212,8 +216,9 @@ async function fetchLogs() {
} }
} }
function renderEvents(events) { function renderEvents(events, total) {
count.textContent = totalItems ? `${totalItems} event${totalItems === 1 ? '' : 's'}` : ''; const totalText = total >= 0 ? `${total} event${total === 1 ? '' : 's'}` : '';
count.textContent = totalText;
eventsContainer.innerHTML = events eventsContainer.innerHTML = events
.map((e, idx) => { .map((e, idx) => {
const actor = const actor =
@@ -272,16 +277,34 @@ async function fetchLogs() {
function renderPagination() { function renderPagination() {
const pagination = document.getElementById('pagination'); const pagination = document.getElementById('pagination');
if (!pagination) return; if (!pagination) return;
const totalPages = Math.max(1, Math.ceil((totalItems || 0) / (pageSize || 1))); const hasPrev = cursorStack.length > 0;
const hasNext = !!nextCursor;
const currentPageNum = cursorStack.length + 1;
pagination.innerHTML = ` pagination.innerHTML = `
<button type="button" id="prevPage" ${currentPage <= 1 ? 'disabled' : ''}>Prev</button> <button type="button" id="prevPage" ${hasPrev ? '' : 'disabled'}>Prev</button>
<span>Page ${currentPage} / ${totalPages}</span> <span>Page ${currentPageNum}</span>
<button type="button" id="nextPage" ${currentPage >= totalPages ? 'disabled' : ''}>Next</button> <button type="button" id="nextPage" ${hasNext ? '' : 'disabled'}>Next</button>
`; `;
const prev = document.getElementById('prevPage'); const prev = document.getElementById('prevPage');
const next = document.getElementById('nextPage'); const next = document.getElementById('nextPage');
if (prev) prev.addEventListener('click', () => { if (currentPage > 1) { currentPage -= 1; loadEvents(); } }); if (prev) prev.addEventListener('click', () => {
if (next) next.addEventListener('click', () => { if (currentPage < totalPages) { currentPage += 1; loadEvents(); } }); if (cursorStack.length) {
const prevCursor = cursorStack.pop();
loadEvents(prevCursor);
}
});
if (next) next.addEventListener('click', () => {
if (nextCursor) {
cursorStack.push(currentCursor);
loadEvents(nextCursor);
}
});
}
function resetPagination() {
cursorStack = [];
nextCursor = null;
currentCursor = null;
} }
function authHeader() { function authHeader() {
@@ -290,11 +313,11 @@ async function fetchLogs() {
const pickToken = (res) => (res ? (res.accessToken || res.idToken || null) : null); const pickToken = (res) => (res ? (res.accessToken || res.idToken || null) : null);
async function initAuth() { async function initAuth() {
try { try {
const res = await fetch('/api/config/auth'); const res = await fetch('/api/config/auth');
authConfig = await res.json(); authConfig = await res.json();
} catch { } catch {
authConfig = { auth_enabled: false }; authConfig = { auth_enabled: false };
} }
@@ -316,78 +339,76 @@ async function initAuth() {
['openid', 'profile', 'email', ...baseScope.split(/[ ,]+/).filter(Boolean)] ['openid', 'profile', 'email', ...baseScope.split(/[ ,]+/).filter(Boolean)]
) )
); );
const authority = `https://login.microsoftonline.com/${tenantId}`; const authority = `https://login.microsoftonline.com/${tenantId}`;
const redirectUri = window.location.origin; const redirectUri = window.location.origin;
msalInstance = new msal.PublicClientApplication({ msalInstance = new msal.PublicClientApplication({
auth: { clientId, authority, redirectUri }, auth: { clientId, authority, redirectUri },
cache: { cacheLocation: 'sessionStorage' }, cache: { cacheLocation: 'sessionStorage' },
}); });
const redirectResult = await msalInstance.handleRedirectPromise().catch(() => null); const redirectResult = await msalInstance.handleRedirectPromise().catch(() => null);
if (redirectResult) { if (redirectResult) {
account = redirectResult.account; account = redirectResult.account;
msalInstance.setActiveAccount(account); msalInstance.setActiveAccount(account);
accessToken = pickToken(redirectResult); accessToken = pickToken(redirectResult);
} else { } else {
const accounts = msalInstance.getAllAccounts(); const accounts = msalInstance.getAllAccounts();
if (accounts.length) { if (accounts.length) {
account = accounts[0]; account = accounts[0];
msalInstance.setActiveAccount(account); msalInstance.setActiveAccount(account);
accessToken = await acquireToken(authScopes); accessToken = await acquireToken(authScopes);
}
}
updateAuthButtons();
if (accessToken) {
await loadFilterOptions();
await loadEvents();
}
} }
}
updateAuthButtons(); async function acquireToken(scopes) {
if (accessToken) { if (!msalInstance || !account) return null;
await loadFilterOptions(); const request = { scopes: scopes && scopes.length ? scopes : ['openid', 'profile', 'email'], account };
await loadEvents(); try {
} const res = await msalInstance.acquireTokenSilent(request);
} return pickToken(res);
} catch {
async function acquireToken(scopes) { const res = await msalInstance.acquireTokenPopup(request);
if (!msalInstance || !account) return null;
const request = { scopes: scopes && scopes.length ? scopes : ['openid', 'profile', 'email'], account };
try {
const res = await msalInstance.acquireTokenSilent(request);
return pickToken(res);
} catch {
const res = await msalInstance.acquireTokenPopup(request);
return pickToken(res); return pickToken(res);
} }
} }
function updateAuthButtons() { function updateAuthButtons() {
const loggedIn = !!account; const loggedIn = !!account;
if (authConfig?.auth_enabled) { if (authConfig?.auth_enabled) {
authBtn.textContent = loggedIn ? 'Logout' : 'Login'; authBtn.textContent = loggedIn ? 'Logout' : 'Login';
} }
if (loggedIn) { if (loggedIn) {
// Refresh token silently on page load if needed. acquireToken(authScopes).then((t) => { if (t) accessToken = t; }).catch(() => {});
acquireToken(authScopes).then((t) => { if (t) accessToken = t; }).catch(() => {}); status.textContent = '';
status.textContent = ''; } else if (authConfig?.auth_enabled) {
} else if (authConfig?.auth_enabled) { status.textContent = 'Please log in to view events.';
status.textContent = 'Please log in to view events.'; }
}
}
authBtn.addEventListener('click', async () => {
if (!authConfig?.auth_enabled || !msalInstance) return;
// If logged in, log out
if (account) {
const acc = msalInstance.getActiveAccount();
accessToken = null;
account = null;
updateAuthButtons();
if (acc) {
await msalInstance.logoutPopup({ account: acc });
} }
return;
} authBtn.addEventListener('click', async () => {
const scopes = authScopes && authScopes.length ? authScopes : ['openid', 'profile', 'email']; if (!authConfig?.auth_enabled || !msalInstance) return;
status.textContent = 'Redirecting to sign in...'; if (account) {
msalInstance.loginRedirect({ scopes }); const acc = msalInstance.getActiveAccount();
}); accessToken = null;
account = null;
updateAuthButtons();
if (acc) {
await msalInstance.logoutPopup({ account: acc });
}
return;
}
const scopes = authScopes && authScopes.length ? authScopes : ['openid', 'profile', 'email'];
status.textContent = 'Redirecting to sign in...';
msalInstance.loginRedirect({ scopes });
});
closeModal.addEventListener('click', () => modal.classList.add('hidden')); closeModal.addEventListener('click', () => modal.classList.add('hidden'));
modal.addEventListener('click', (e) => { modal.addEventListener('click', (e) => {
@@ -396,16 +417,16 @@ authBtn.addEventListener('click', async () => {
form.addEventListener('submit', (e) => { form.addEventListener('submit', (e) => {
e.preventDefault(); e.preventDefault();
currentPage = 1; resetPagination();
loadEvents(); loadEvents();
}); });
fetchBtn.addEventListener('click', () => fetchLogs()); fetchBtn.addEventListener('click', () => fetchLogs());
refreshBtn.addEventListener('click', () => loadEvents()); refreshBtn.addEventListener('click', () => loadEvents(currentCursor));
clearBtn.addEventListener('click', () => { clearBtn.addEventListener('click', () => {
form.reset(); form.reset();
currentPage = 1; resetPagination();
loadEvents(); loadEvents();
}); });

View File

@@ -5,10 +5,10 @@ from graph.resolve import resolve_directory_object, resolve_service_principal_ow
from utils.http import get_with_retry from utils.http import get_with_retry
def fetch_audit_logs(hours=24, max_pages=50): def fetch_audit_logs(hours: int = 24, since: str | None = None, max_pages: int = 50):
"""Fetch paginated directory audit logs from Microsoft Graph and enrich with resolved names.""" """Fetch paginated directory audit logs from Microsoft Graph and enrich with resolved names."""
token = get_access_token() token = get_access_token()
start_time = (datetime.utcnow() - timedelta(hours=hours)).isoformat() + "Z" start_time = since or (datetime.utcnow() - timedelta(hours=hours)).isoformat() + "Z"
next_url = ( next_url = (
"https://graph.microsoft.com/v1.0/" "https://graph.microsoft.com/v1.0/"
f"auditLogs/directoryAudits?$filter=activityDateTime ge {start_time}" f"auditLogs/directoryAudits?$filter=activityDateTime ge {start_time}"

View File

@@ -1,18 +1,23 @@
import asyncio import asyncio
import logging import logging
import time
from contextlib import suppress from contextlib import suppress
from pathlib import Path from pathlib import Path
import structlog import structlog
from config import CORS_ORIGINS, ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES from config import CORS_ORIGINS, ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES
from database import setup_indexes from database import setup_indexes
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import Response
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from metrics import observe_request, prometheus_metrics
from middleware import CorrelationIdMiddleware
from routes.config import router as config_router from routes.config import router as config_router
from routes.events import router as events_router from routes.events import router as events_router
from routes.fetch import router as fetch_router from routes.fetch import router as fetch_router
from routes.fetch import run_fetch from routes.fetch import run_fetch
from routes.webhooks import router as webhooks_router
def configure_logging(): def configure_logging():
@@ -41,6 +46,7 @@ logger = structlog.get_logger("aoc.fetcher")
app = FastAPI() app = FastAPI()
app.add_middleware(CorrelationIdMiddleware)
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=CORS_ORIGINS, allow_origins=CORS_ORIGINS,
@@ -49,9 +55,21 @@ app.add_middleware(
allow_headers=["*"], allow_headers=["*"],
) )
@app.middleware("http")
async def prometheus_middleware(request: Request, call_next):
start = time.time()
response = await call_next(request)
duration = time.time() - start
path = getattr(request.scope.get("route"), "path", request.url.path)
observe_request(request.method, path, response.status_code, duration)
return response
app.include_router(fetch_router, prefix="/api") app.include_router(fetch_router, prefix="/api")
app.include_router(events_router, prefix="/api") app.include_router(events_router, prefix="/api")
app.include_router(config_router, prefix="/api") app.include_router(config_router, prefix="/api")
app.include_router(webhooks_router, prefix="/api")
@app.get("/health") @app.get("/health")
@@ -65,6 +83,11 @@ async def health_check():
raise HTTPException(status_code=503, detail="Database unavailable") from exc raise HTTPException(status_code=503, detail="Database unavailable") from exc
@app.get("/metrics")
async def metrics():
return Response(content=prometheus_metrics(), media_type="text/plain")
frontend_dir = Path(__file__).parent / "frontend" frontend_dir = Path(__file__).parent / "frontend"
app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend") app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")

43
backend/metrics.py Normal file
View File

@@ -0,0 +1,43 @@
from prometheus_client import Counter, Histogram, generate_latest
REQUEST_DURATION = Histogram(
"aoc_request_duration_seconds",
"HTTP request duration",
["method", "path", "status"],
)
EVENTS_FETCHED = Counter(
"aoc_events_fetched_total",
"Number of audit events fetched per source",
["source"],
)
FETCH_ERRORS = Counter(
"aoc_fetch_errors_total",
"Number of fetch errors per source",
["source"],
)
FETCH_DURATION = Histogram(
"aoc_fetch_duration_seconds",
"Duration of fetch jobs per source",
["source"],
)
def observe_request(method: str, path: str, status: int, duration: float):
REQUEST_DURATION.labels(method=method, path=path, status=str(status)).observe(duration)
def track_fetch(source: str, count: int):
EVENTS_FETCHED.labels(source=source).inc(count)
def track_fetch_error(source: str):
FETCH_ERRORS.labels(source=source).inc()
def track_fetch_duration(source: str, duration: float):
FETCH_DURATION.labels(source=source).observe(duration)
def prometheus_metrics():
return generate_latest()

16
backend/middleware.py Normal file
View File

@@ -0,0 +1,16 @@
import uuid
import structlog
from fastapi import Request, Response
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
class CorrelationIdMiddleware(BaseHTTPMiddleware):
"""Inject or propagate a correlation ID for every request."""
async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
cid = request.headers.get("x-request-id") or uuid.uuid4().hex
structlog.contextvars.bind_contextvars(correlation_id=cid)
response = await call_next(request)
response.headers["x-request-id"] = cid
return response

View File

@@ -1,3 +1,4 @@
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
@@ -23,8 +24,8 @@ class EventItem(BaseModel):
class PaginatedEventResponse(BaseModel): class PaginatedEventResponse(BaseModel):
items: list[dict] items: list[dict]
total: int total: int
page: int
page_size: int page_size: int
next_cursor: str | None = None
class FilterOptionsResponse(BaseModel): class FilterOptionsResponse(BaseModel):

View File

@@ -8,3 +8,4 @@ python-jose[cryptography]
pydantic-settings pydantic-settings
structlog structlog
tenacity tenacity
prometheus-client

View File

@@ -1,3 +1,4 @@
import base64
import re import re
from auth import require_auth from auth import require_auth
@@ -8,6 +9,20 @@ from models.api import FilterOptionsResponse, PaginatedEventResponse
router = APIRouter(dependencies=[Depends(require_auth)]) router = APIRouter(dependencies=[Depends(require_auth)])
def _encode_cursor(timestamp: str, oid: str) -> str:
payload = f"{timestamp}|{oid}"
return base64.b64encode(payload.encode()).decode()
def _decode_cursor(cursor: str) -> tuple[str, str]:
try:
payload = base64.b64decode(cursor.encode()).decode()
timestamp, oid = payload.split("|", 1)
return timestamp, oid
except Exception as exc:
raise HTTPException(status_code=400, detail="Invalid cursor") from exc
@router.get("/events", response_model=PaginatedEventResponse) @router.get("/events", response_model=PaginatedEventResponse)
def list_events( def list_events(
service: str | None = None, service: str | None = None,
@@ -17,7 +32,7 @@ def list_events(
start: str | None = None, start: str | None = None,
end: str | None = None, end: str | None = None,
search: str | None = None, search: str | None = None,
page: int = Query(default=1, ge=1), cursor: str | None = None,
page_size: int = Query(default=50, ge=1, le=500), page_size: int = Query(default=50, ge=1, le=500),
): ):
filters = [] filters = []
@@ -61,26 +76,47 @@ def list_events(
} }
) )
if cursor:
try:
cursor_ts, cursor_oid = _decode_cursor(cursor)
except HTTPException:
raise
filters.append(
{
"$or": [
{"timestamp": {"$lt": cursor_ts}},
{"timestamp": cursor_ts, "_id": {"$lt": cursor_oid}},
]
}
)
query = {"$and": filters} if filters else {} query = {"$and": filters} if filters else {}
safe_page_size = max(1, min(page_size, 500)) safe_page_size = max(1, min(page_size, 500))
safe_page = max(1, page)
skip = (safe_page - 1) * safe_page_size
try: try:
total = events_collection.count_documents(query) total = events_collection.count_documents(query) if not cursor else -1
cursor = events_collection.find(query).sort("timestamp", -1).skip(skip).limit(safe_page_size) cursor_query = (
events = list(cursor) events_collection.find(query)
.sort([("timestamp", -1), ("_id", -1)])
.limit(safe_page_size)
)
events = list(cursor_query)
except Exception as exc: except Exception as exc:
raise HTTPException(status_code=500, detail=f"Failed to query events: {exc}") from exc raise HTTPException(status_code=500, detail=f"Failed to query events: {exc}") from exc
next_cursor = None
if len(events) == safe_page_size:
last = events[-1]
next_cursor = _encode_cursor(last["timestamp"], str(last["_id"]))
for e in events: for e in events:
e["_id"] = str(e["_id"]) e["_id"] = str(e["_id"])
return { return {
"items": events, "items": events,
"total": total, "total": total,
"page": safe_page,
"page_size": safe_page_size, "page_size": safe_page_size,
"next_cursor": next_cursor,
} }

View File

@@ -1,31 +1,46 @@
import time
from auth import require_auth from auth import require_auth
from database import events_collection from database import events_collection
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from graph.audit_logs import fetch_audit_logs from graph.audit_logs import fetch_audit_logs
from metrics import track_fetch, track_fetch_duration, track_fetch_error
from models.api import FetchAuditLogsResponse from models.api import FetchAuditLogsResponse
from models.event_model import normalize_event from models.event_model import normalize_event
from pymongo import UpdateOne from pymongo import UpdateOne
from sources.intune_audit import fetch_intune_audit from sources.intune_audit import fetch_intune_audit
from sources.unified_audit import fetch_unified_audit from sources.unified_audit import fetch_unified_audit
from watermark import get_watermark, set_watermark
router = APIRouter(dependencies=[Depends(require_auth)]) router = APIRouter(dependencies=[Depends(require_auth)])
def run_fetch(hours: int = 168): def run_fetch(hours: int = 168):
from datetime import datetime
window = max(1, min(hours, 720)) # cap to 30 days for sanity window = max(1, min(hours, 720)) # cap to 30 days for sanity
now = datetime.utcnow().isoformat() + "Z"
logs = [] logs = []
errors = [] errors = []
def fetch_source(fn, label): def fetch_source(fn, label, source_key):
start_time = time.time()
try: try:
return fn(hours=window) since = get_watermark(source_key)
result = fn(since=since) if since else fn(hours=window)
set_watermark(source_key, now)
track_fetch(source_key, len(result))
return result
except Exception as exc: except Exception as exc:
errors.append(f"{label}: {exc}") errors.append(f"{label}: {exc}")
track_fetch_error(source_key)
return [] return []
finally:
track_fetch_duration(source_key, time.time() - start_time)
logs.extend(fetch_source(fetch_audit_logs, "Directory audit")) logs.extend(fetch_source(fetch_audit_logs, "Directory audit", "directory"))
logs.extend(fetch_source(fetch_unified_audit, "Unified audit (Exchange/SharePoint/Teams)")) logs.extend(fetch_source(fetch_unified_audit, "Unified audit", "unified"))
logs.extend(fetch_source(fetch_intune_audit, "Intune audit")) logs.extend(fetch_source(fetch_intune_audit, "Intune audit", "intune"))
normalized = [normalize_event(e) for e in logs] normalized = [normalize_event(e) for e in logs]
if normalized: if normalized:

View File

@@ -0,0 +1,32 @@
import structlog
from fastapi import APIRouter, Request, Response
router = APIRouter()
logger = structlog.get_logger("aoc.webhooks")
@router.post("/webhooks/graph")
async def graph_webhook(request: Request):
"""
Receive Microsoft Graph change notifications.
Handles the validation handshake by echoing validationToken.
"""
validation_token = request.query_params.get("validationToken")
if validation_token:
return Response(content=validation_token, media_type="text/plain")
try:
body = await request.json()
except Exception as exc:
logger.warning("Invalid webhook payload", error=str(exc))
return Response(status_code=400)
for notification in body.get("value", []):
logger.info(
"Received Graph notification",
change_type=notification.get("changeType"),
resource=notification.get("resource"),
client_state=notification.get("clientState"),
)
return {"status": "accepted"}

View File

@@ -4,13 +4,13 @@ from graph.auth import get_access_token
from utils.http import get_with_retry from utils.http import get_with_retry
def fetch_intune_audit(hours: int = 24, max_pages: int = 50) -> list[dict]: def fetch_intune_audit(hours: int = 24, since: str | None = None, max_pages: int = 50) -> list[dict]:
""" """
Fetch Intune audit events via Microsoft Graph. Fetch Intune audit events via Microsoft Graph.
Requires Intune audit permissions (e.g., DeviceManagementConfiguration.Read.All). Requires Intune audit permissions (e.g., DeviceManagementConfiguration.Read.All).
""" """
token = get_access_token() token = get_access_token()
start_time = (datetime.utcnow() - timedelta(hours=hours)).isoformat() + "Z" start_time = since or (datetime.utcnow() - timedelta(hours=hours)).isoformat() + "Z"
url = ( url = (
"https://graph.microsoft.com/v1.0/deviceManagement/auditEvents" "https://graph.microsoft.com/v1.0/deviceManagement/auditEvents"
f"?$filter=activityDateTime ge {start_time}" f"?$filter=activityDateTime ge {start_time}"

View File

@@ -11,10 +11,13 @@ AUDIT_CONTENT_TYPES = {
} }
def _time_window(hours: int): def _time_window(hours: int, since: str | None = None):
end = datetime.utcnow() end = datetime.utcnow()
start = end - timedelta(hours=hours) if since:
# Activity API expects UTC ISO without Z # Office 365 API expects format without Z
start = datetime.fromisoformat(since.replace("Z", "+00:00")).replace(tzinfo=None)
else:
start = end - timedelta(hours=hours)
return start.strftime("%Y-%m-%dT%H:%M:%S"), end.strftime("%Y-%m-%dT%H:%M:%S") return start.strftime("%Y-%m-%dT%H:%M:%S"), end.strftime("%Y-%m-%dT%H:%M:%S")
@@ -26,8 +29,8 @@ def _ensure_subscription(content_type: str, token: str, tenant_id: str):
post_with_retry(url, params=params, headers=headers, timeout=10) post_with_retry(url, params=params, headers=headers, timeout=10)
def _list_content(content_type: str, token: str, tenant_id: str, hours: int) -> list[dict]: def _list_content(content_type: str, token: str, tenant_id: str, hours: int, since: str | None = None) -> list[dict]:
start, end = _time_window(hours) start, end = _time_window(hours, since)
url = f"https://manage.office.com/api/v1.0/{tenant_id}/activity/feed/subscriptions/content" url = f"https://manage.office.com/api/v1.0/{tenant_id}/activity/feed/subscriptions/content"
params = {"contentType": content_type, "startTime": start, "endTime": end} params = {"contentType": content_type, "startTime": start, "endTime": end}
headers = {"Authorization": f"Bearer {token}"} headers = {"Authorization": f"Bearer {token}"}
@@ -56,7 +59,7 @@ def _download_content(content_uri: str, token: str) -> list[dict]:
raise RuntimeError(f"Failed to download audit content: {exc}") from exc raise RuntimeError(f"Failed to download audit content: {exc}") from exc
def fetch_unified_audit(hours: int = 24, max_files: int = 50) -> list[dict]: def fetch_unified_audit(hours: int = 24, since: str | None = None, max_files: int = 50) -> list[dict]:
""" """
Fetch unified audit logs (Exchange, SharePoint, Teams policy changes via Audit.General) Fetch unified audit logs (Exchange, SharePoint, Teams policy changes via Audit.General)
using the Office 365 Management Activity API. using the Office 365 Management Activity API.
@@ -69,7 +72,7 @@ def fetch_unified_audit(hours: int = 24, max_files: int = 50) -> list[dict]:
for content_type in AUDIT_CONTENT_TYPES: for content_type in AUDIT_CONTENT_TYPES:
_ensure_subscription(content_type, token, TENANT_ID) _ensure_subscription(content_type, token, TENANT_ID)
contents = _list_content(content_type, token, TENANT_ID, hours) contents = _list_content(content_type, token, TENANT_ID, hours, since)
for item in contents[:max_files]: for item in contents[:max_files]:
content_uri = item.get("contentUri") content_uri = item.get("contentUri")
if not content_uri: if not content_uri:

View File

@@ -12,13 +12,22 @@ def mock_events_collection():
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def client(mock_events_collection, monkeypatch): def mock_watermarks_collection():
# Patch the collection in all modules that import it before the app is imported client = mongomock.MongoClient()
db = client["micro_soc"]
coll = db["watermarks"]
return coll
@pytest.fixture(scope="function")
def client(mock_events_collection, mock_watermarks_collection, monkeypatch):
monkeypatch.setattr("database.events_collection", mock_events_collection) monkeypatch.setattr("database.events_collection", mock_events_collection)
monkeypatch.setattr("routes.fetch.events_collection", mock_events_collection) monkeypatch.setattr("routes.fetch.events_collection", mock_events_collection)
monkeypatch.setattr("routes.events.events_collection", mock_events_collection) monkeypatch.setattr("routes.events.events_collection", mock_events_collection)
monkeypatch.setattr("watermark.watermarks_collection", mock_watermarks_collection)
monkeypatch.setattr("routes.fetch.get_watermark", lambda source: None)
monkeypatch.setattr("routes.fetch.set_watermark", lambda source, ts: None)
monkeypatch.setattr("auth.AUTH_ENABLED", False) monkeypatch.setattr("auth.AUTH_ENABLED", False)
# Patch health check db.command so it doesn't need a real MongoDB server
monkeypatch.setattr("database.db.command", lambda cmd: {"ok": 1} if cmd == "ping" else {}) monkeypatch.setattr("database.db.command", lambda cmd: {"ok": 1} if cmd == "ping" else {})
from main import app from main import app

View File

@@ -9,15 +9,21 @@ def test_health(client):
assert data["database"] == "connected" assert data["database"] == "connected"
def test_metrics(client):
response = client.get("/metrics")
assert response.status_code == 200
assert "aoc_request_duration_seconds" in response.text
def test_list_events_empty(client): def test_list_events_empty(client):
response = client.get("/api/events") response = client.get("/api/events")
assert response.status_code == 200 assert response.status_code == 200
data = response.json() data = response.json()
assert data["items"] == [] assert data["items"] == []
assert data["total"] == 0 assert data["next_cursor"] is None
def test_list_events_pagination(client, mock_events_collection): def test_list_events_cursor_pagination(client, mock_events_collection):
for i in range(5): for i in range(5):
mock_events_collection.insert_one({ mock_events_collection.insert_one({
"id": f"evt-{i}", "id": f"evt-{i}",
@@ -28,13 +34,18 @@ def test_list_events_pagination(client, mock_events_collection):
"actor_display": f"Actor {i}", "actor_display": f"Actor {i}",
"raw_text": "", "raw_text": "",
}) })
response = client.get("/api/events?page=1&page_size=2") response = client.get("/api/events?page_size=2")
assert response.status_code == 200 assert response.status_code == 200
data = response.json() data = response.json()
assert data["total"] == 5
assert len(data["items"]) == 2 assert len(data["items"]) == 2
assert data["page"] == 1 assert data["next_cursor"] is not None
assert data["page_size"] == 2
# Follow cursor
response2 = client.get(f"/api/events?page_size=2&cursor={data['next_cursor']}")
assert response2.status_code == 200
data2 = response2.json()
assert len(data2["items"]) == 2
assert data2["next_cursor"] is not None
def test_list_events_filter_by_service(client, mock_events_collection): def test_list_events_filter_by_service(client, mock_events_collection):
@@ -59,7 +70,7 @@ def test_list_events_filter_by_service(client, mock_events_collection):
response = client.get("/api/events?service=Exchange") response = client.get("/api/events?service=Exchange")
assert response.status_code == 200 assert response.status_code == 200
data = response.json() data = response.json()
assert data["total"] == 1 assert len(data["items"]) == 1
assert data["items"][0]["service"] == "Exchange" assert data["items"][0]["service"] == "Exchange"
@@ -96,3 +107,26 @@ def test_fetch_audit_logs_validation(client):
assert response.status_code == 422 assert response.status_code == 422
response = client.get("/api/fetch-audit-logs?hours=721") response = client.get("/api/fetch-audit-logs?hours=721")
assert response.status_code == 422 assert response.status_code == 422
def test_graph_webhook_validation(client):
token = "test-validation-token-123"
response = client.post("/api/webhooks/graph?validationToken=" + token)
assert response.status_code == 200
assert response.text == token
assert response.headers["content-type"] == "text/plain; charset=utf-8"
def test_graph_webhook_notification(client):
payload = {
"value": [
{
"changeType": "updated",
"resource": "auditLogs/directoryAudits",
"clientState": "secret",
}
]
}
response = client.post("/api/webhooks/graph", json=payload)
assert response.status_code == 200
assert response.json()["status"] == "accepted"

18
backend/watermark.py Normal file
View File

@@ -0,0 +1,18 @@
from database import db
watermarks_collection = db["watermarks"]
def get_watermark(source: str) -> str | None:
"""Return the ISO timestamp of the last successful fetch for a source."""
doc = watermarks_collection.find_one({"source": source})
return doc.get("last_fetch_time") if doc else None
def set_watermark(source: str, timestamp: str):
"""Persist the latest successful fetch timestamp for a source."""
watermarks_collection.update_one(
{"source": source},
{"$set": {"last_fetch_time": timestamp}},
upsert=True,
)