feat: implement Phase 3 scaling
Some checks failed
CI / lint-and-test (push) Has been cancelled

- Replace skip-based pagination with cursor-based pagination (timestamp|_id cursors)
- Add Prometheus /metrics endpoint with request latency, fetch volume, and error counters
- Implement incremental fetch watermarking per source (watermarks collection in MongoDB)
- Add Graph change notification webhook endpoint (/api/webhooks/graph)
- Add correlation ID middleware for distributed tracing (x-request-id header)
- Update frontend to use cursor-based pagination with Prev/Next navigation
- Update tests for cursor pagination, metrics, webhooks, and watermark mocking
This commit is contained in:
2026-04-14 14:58:50 +02:00
parent 9271b4e461
commit b0198012eb
17 changed files with 402 additions and 147 deletions

View File

@@ -1,31 +1,46 @@
import time
from auth import require_auth
from database import events_collection
from fastapi import APIRouter, Depends, HTTPException, Query
from graph.audit_logs import fetch_audit_logs
from metrics import track_fetch, track_fetch_duration, track_fetch_error
from models.api import FetchAuditLogsResponse
from models.event_model import normalize_event
from pymongo import UpdateOne
from sources.intune_audit import fetch_intune_audit
from sources.unified_audit import fetch_unified_audit
from watermark import get_watermark, set_watermark
router = APIRouter(dependencies=[Depends(require_auth)])
def run_fetch(hours: int = 168):
from datetime import datetime
window = max(1, min(hours, 720)) # cap to 30 days for sanity
now = datetime.utcnow().isoformat() + "Z"
logs = []
errors = []
def fetch_source(fn, label):
def fetch_source(fn, label, source_key):
start_time = time.time()
try:
return fn(hours=window)
since = get_watermark(source_key)
result = fn(since=since) if since else fn(hours=window)
set_watermark(source_key, now)
track_fetch(source_key, len(result))
return result
except Exception as exc:
errors.append(f"{label}: {exc}")
track_fetch_error(source_key)
return []
finally:
track_fetch_duration(source_key, time.time() - start_time)
logs.extend(fetch_source(fetch_audit_logs, "Directory audit"))
logs.extend(fetch_source(fetch_unified_audit, "Unified audit (Exchange/SharePoint/Teams)"))
logs.extend(fetch_source(fetch_intune_audit, "Intune audit"))
logs.extend(fetch_source(fetch_audit_logs, "Directory audit", "directory"))
logs.extend(fetch_source(fetch_unified_audit, "Unified audit", "unified"))
logs.extend(fetch_source(fetch_intune_audit, "Intune audit", "intune"))
normalized = [normalize_event(e) for e in logs]
if normalized: