diff --git a/.env.example b/.env.example index 8b6d0d9..8404097 100644 --- a/.env.example +++ b/.env.example @@ -64,6 +64,10 @@ ALERT_WEBHOOK_URL= ALERT_WEBHOOK_FORMAT=generic # generic | slack | teams ALERT_DEDUPE_MINUTES=15 +# Webhook security (optional but strongly recommended) +# Set this to the same clientState used when creating Graph subscriptions +WEBHOOK_CLIENT_SECRET= + # Optional: privacy / access control # Hide entire services from users without PRIVACY_SERVICE_ROLES # PRIVACY_SERVICES=Exchange,Teams diff --git a/RELEASE_NOTES_v1.7.7.md b/RELEASE_NOTES_v1.7.7.md new file mode 100644 index 0000000..fb46166 --- /dev/null +++ b/RELEASE_NOTES_v1.7.7.md @@ -0,0 +1,99 @@ +# AOC v1.7.7 Release Notes + +**Release date:** 2026-04-24 + +--- + +## Security Hardening + +This release is a focused security patch addressing findings from an internal audit. All users running AOC in production are encouraged to upgrade. + +### Webhook authentication (`/api/webhooks/graph`) +- **ClientState validation** — Notifications now require a matching `WEBHOOK_CLIENT_SECRET`. Set this in your `.env` to the same value used when creating Graph subscriptions. +- Rejects spoofed notification payloads with `401 Unauthorized`. + +### Rate limiting +- **Redis-backed fixed-window rate limiting** is now enabled by default. +- Per-category limits: + - `/api/fetch-audit-logs` — 10 requests/hour + - `/api/ask` — 30 requests/minute + - `/api/events/bulk-tags` — 20 requests/minute + - All other endpoints — 120 requests/minute +- Returns `429 Too Many Requests` with a `Retry-After` header when exceeded. + +### SSRF protection for LLM calls +- `LLM_BASE_URL` is now validated before every outbound request. +- Blocks non-HTTPS URLs, localhost, link-local addresses (`169.254.169.254`), and all private IP ranges. + +### CORS enforcement +- Wildcard (`*`) origins are **automatically stripped** when `AUTH_ENABLED=true`. +- A startup warning is logged if an insecure CORS configuration is detected. + +### Content Security Policy +- API and HTML responses now include a `Content-Security-Policy` header. +- Restricts script sources to self, CDN origins, and MSAL auth library. + +### Audit trail integrity +- The audit middleware no longer parses JWT tokens without signature verification. +- Verified claims are now propagated safely via `contextvars`, eliminating audit log poisoning. + +### Standalone MCP server +- Prints a prominent security warning on startup reminding operators that the stdio transport has no authentication layer. + +--- + +## Operational Improvements + +### Bulk tag cap +- `POST /api/events/bulk-tags` now refuses to update more than **10,000 events** in a single request. +- Returns `400` with guidance to narrow filters. + +### Generic error responses +- Internal exception details are no longer leaked in HTTP 500/502 responses. +- Full stack traces remain in server-side logs. + +### Alert rule schema +- `conditions` field now uses a strict Pydantic model (`AlertCondition`) instead of an unconstrained `list[dict]`. +- Prevents stored data pollution from malformed rule payloads. + +### Docker Compose +- MongoDB (`27017`) and Redis (`6379`) ports are no longer forwarded to the Docker host. +- Internal services are reachable only via the Docker network. + +--- + +## Configuration + +Add to your `.env`: + +```bash +# Required if you use Graph webhooks +WEBHOOK_CLIENT_SECRET=your-random-secret + +# Optional: disable rate limiting (not recommended) +RATE_LIMIT_ENABLED=true +RATE_LIMIT_REQUESTS=120 +RATE_LIMIT_WINDOW_SECONDS=60 +``` + +--- + +## Upgrade notes + +**No breaking changes.** Existing event data, tags, comments, and saved searches are preserved. + +After pulling: + +```bash +export AOC_VERSION=v1.7.7 +docker compose -f docker-compose.prod.yml pull +docker compose -f docker-compose.prod.yml up -d +``` + +--- + +## Docker image + +``` +git.cqre.net/cqrenet/aoc-backend:v1.7.7 +``` diff --git a/VERSION b/VERSION index de28578..91c74a5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.6 +1.7.7 diff --git a/backend/auth.py b/backend/auth.py index 506c3cf..541c82e 100644 --- a/backend/auth.py +++ b/backend/auth.py @@ -1,3 +1,4 @@ +import contextvars import time import requests @@ -15,6 +16,9 @@ from fastapi import Header, HTTPException from jwt import ExpiredSignatureError, InvalidTokenError, decode from jwt.algorithms import RSAAlgorithm +# Thread-/task-local storage for verified auth claims (used by audit middleware) +_auth_context: contextvars.ContextVar[dict | None] = contextvars.ContextVar("auth_context", default=None) + JWKS_CACHE = {"exp": 0, "keys": []} logger = structlog.get_logger("aoc.auth") @@ -94,7 +98,9 @@ def user_can_access_privacy_services(claims: dict) -> bool: def require_auth(authorization: str | None = Header(None)): if not AUTH_ENABLED: - return {"sub": "anonymous"} + user = {"sub": "anonymous"} + _auth_context.set(user) + return user if not authorization or not authorization.lower().startswith("bearer "): raise HTTPException(status_code=401, detail="Missing bearer token") @@ -106,4 +112,5 @@ def require_auth(authorization: str | None = Header(None)): if not _allowed(claims, AUTH_ALLOWED_ROLES, AUTH_ALLOWED_GROUPS): raise HTTPException(status_code=403, detail="Forbidden") + _auth_context.set(claims) return claims diff --git a/backend/config.py b/backend/config.py index 74c17e7..dda30be 100644 --- a/backend/config.py +++ b/backend/config.py @@ -68,6 +68,14 @@ class Settings(BaseSettings): ALERT_WEBHOOK_FORMAT: str = "generic" # generic | slack | teams ALERT_DEDUPE_MINUTES: int = 15 + # Webhook security + WEBHOOK_CLIENT_SECRET: str = "" + + # Rate limiting + RATE_LIMIT_ENABLED: bool = True + RATE_LIMIT_REQUESTS: int = 120 + RATE_LIMIT_WINDOW_SECONDS: int = 60 + _settings = Settings() @@ -113,3 +121,9 @@ DEFAULT_PAGE_SIZE = _settings.DEFAULT_PAGE_SIZE ALERT_WEBHOOK_URL = _settings.ALERT_WEBHOOK_URL ALERT_WEBHOOK_FORMAT = _settings.ALERT_WEBHOOK_FORMAT ALERT_DEDUPE_MINUTES = _settings.ALERT_DEDUPE_MINUTES + +WEBHOOK_CLIENT_SECRET = _settings.WEBHOOK_CLIENT_SECRET + +RATE_LIMIT_ENABLED = _settings.RATE_LIMIT_ENABLED +RATE_LIMIT_REQUESTS = _settings.RATE_LIMIT_REQUESTS +RATE_LIMIT_WINDOW_SECONDS = _settings.RATE_LIMIT_WINDOW_SECONDS diff --git a/backend/main.py b/backend/main.py index 36deafb..e83a658 100644 --- a/backend/main.py +++ b/backend/main.py @@ -6,7 +6,7 @@ from pathlib import Path import structlog from audit_trail import log_action -from config import AI_FEATURES_ENABLED, CORS_ORIGINS, ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES +from config import AI_FEATURES_ENABLED, AUTH_ENABLED, CORS_ORIGINS, ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES from database import setup_indexes from fastapi import FastAPI, HTTPException, Request from fastapi.middleware.cors import CORSMiddleware @@ -52,10 +52,19 @@ logger = structlog.get_logger("aoc.fetcher") app = FastAPI() +# CORS: reject wildcard in production when auth is enabled +_effective_cors = CORS_ORIGINS +if AUTH_ENABLED and "*" in _effective_cors: + logger.warning( + "CORS wildcard (*) is insecure when AUTH_ENABLED=true. " + "Removing wildcard. Set CORS_ORIGINS explicitly in production." + ) + _effective_cors = [o for o in _effective_cors if o != "*"] or ["http://localhost:8000"] + app.add_middleware(CorrelationIdMiddleware) app.add_middleware( CORSMiddleware, - allow_origins=CORS_ORIGINS, + allow_origins=_effective_cors, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], @@ -80,27 +89,39 @@ async def cache_control_middleware(request: Request, call_next): response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" response.headers["Pragma"] = "no-cache" response.headers["Expires"] = "0" + # Basic CSP for the UI and API + if request.url.path.startswith("/api/") or request.url.path in ("/", "/index.html"): + response.headers["Content-Security-Policy"] = ( + "default-src 'self'; " + "script-src 'self' 'unsafe-inline' cdn.jsdelivr.net alcdn.msauth.net; " + "style-src 'self' 'unsafe-inline'; " + "connect-src 'self'; " + "img-src 'self' data:;" + ) return response +@app.middleware("http") +async def rate_limit_middleware(request: Request, call_next): + """Apply Redis-backed rate limiting before processing the request.""" + if request.url.path.startswith("/api/"): + from rate_limiter import check_rate_limit + + await check_rate_limit(request) + return await call_next(request) + + @app.middleware("http") async def audit_middleware(request: Request, call_next): response = await call_next(request) if request.url.path.startswith("/api/") and request.method in ("POST", "PATCH", "PUT", "DELETE"): - from auth import AUTH_ENABLED - user = "anonymous" if AUTH_ENABLED: - auth_header = request.headers.get("authorization", "") - if auth_header.lower().startswith("bearer "): - try: - from jose import jwt + from auth import _auth_context - token = auth_header.split(" ", 1)[1] - claims = jwt.get_unverified_claims(token) - user = claims.get("sub", "unknown") - except Exception: - pass + claims = _auth_context.get(None) + if isinstance(claims, dict): + user = claims.get("sub", "unknown") log_action( action=request.method.lower(), resource=request.url.path, @@ -152,6 +173,19 @@ async def version(): return {"version": os.environ.get("VERSION", "unknown")} +@app.exception_handler(Exception) +async def generic_exception_handler(request: Request, exc: Exception): + """Return generic error messages for unhandled exceptions to avoid info leakage.""" + if isinstance(exc, HTTPException): + raise exc + logger.error("Unhandled exception", path=request.url.path, error=str(exc)) + return Response( + content='{"detail":"Internal server error"}', + status_code=500, + media_type="application/json", + ) + + frontend_dir = Path(__file__).parent / "frontend" app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend") diff --git a/backend/mcp_server.py b/backend/mcp_server.py index f487685..a93edf1 100644 --- a/backend/mcp_server.py +++ b/backend/mcp_server.py @@ -41,6 +41,15 @@ from mcp_common import ( handle_search_events, ) +# Security warning: this standalone stdio server has no authentication. +# Only run it in trusted environments (e.g. local Claude Desktop) and +# ensure the MongoDB connection uses authenticated credentials. +print("=" * 60, file=sys.stderr) +print("AOC MCP Server (stdio transport)", file=sys.stderr) +print("WARNING: No authentication layer. Only run in trusted", file=sys.stderr) +print("environments or behind a VPN. See AGENTS.md for details.", file=sys.stderr) +print("=" * 60, file=sys.stderr) + app = Server("aoc") diff --git a/backend/models/api.py b/backend/models/api.py index a52b704..4a10034 100644 --- a/backend/models/api.py +++ b/backend/models/api.py @@ -63,12 +63,18 @@ class CommentAddRequest(BaseModel): text: str +class AlertCondition(BaseModel): + field: str + op: str # eq, neq, contains, in, after_hours + value: str | list[str] | None = None + + class AlertRuleResponse(BaseModel): id: str | None = None name: str enabled: bool severity: str - conditions: list[dict] + conditions: list[AlertCondition] message: str diff --git a/backend/rate_limiter.py b/backend/rate_limiter.py new file mode 100644 index 0000000..462f8bc --- /dev/null +++ b/backend/rate_limiter.py @@ -0,0 +1,82 @@ +"""Simple Redis-backed fixed-window rate limiter.""" + +import time + +import structlog +from config import RATE_LIMIT_ENABLED, RATE_LIMIT_REQUESTS, RATE_LIMIT_WINDOW_SECONDS +from fastapi import HTTPException, Request +from redis_client import get_redis + +logger = structlog.get_logger("aoc.rate_limit") + + +class RateLimitExceeded(HTTPException): + def __init__(self, retry_after: int): + super().__init__( + status_code=429, + detail="Rate limit exceeded. Please slow down.", + headers={"Retry-After": str(retry_after)}, + ) + + +def _get_identifier(request: Request) -> str: + """Best-effort client identifier: authenticated sub, or X-Forwarded-For, or client host.""" + user = getattr(request.state, "user", None) + if user and isinstance(user, dict): + sub = user.get("sub") + if sub and sub != "anonymous": + return f"user:{sub}" + + forwarded = request.headers.get("x-forwarded-for") + if forwarded: + return f"ip:{forwarded.split(',')[0].strip()}" + + return f"ip:{request.client.host if request.client else 'unknown'}" + + +def _get_path_category(path: str) -> str: + """Bucket paths into rate-limit categories.""" + if path.startswith("/api/fetch"): + return "fetch" + if path.startswith("/api/ask"): + return "ask" + if path.startswith("/api/events/bulk-tags"): + return "write" + return "default" + + +def _limit_for_category(category: str) -> tuple[int, int]: + """Return (max_requests, window_seconds) for a category.""" + if category == "fetch": + return (10, 3600) # 10 per hour + if category == "ask": + return (30, 60) # 30 per minute + if category == "write": + return (20, 60) # 20 per minute + return (RATE_LIMIT_REQUESTS, RATE_LIMIT_WINDOW_SECONDS) + + +async def check_rate_limit(request: Request): + """Raise RateLimitExceeded if the client has exceeded their quota.""" + if not RATE_LIMIT_ENABLED: + return + + category = _get_path_category(request.url.path) + limit, window = _limit_for_category(category) + + identifier = _get_identifier(request) + now = int(time.time()) + window_key = now // window + redis_key = f"rate_limit:{identifier}:{category}:{window_key}" + + try: + redis = await get_redis() + count = await redis.incr(redis_key) + if count == 1: + await redis.expire(redis_key, window) + if count > limit: + raise RateLimitExceeded(retry_after=window - (now % window)) + except RateLimitExceeded: + raise + except Exception as exc: + logger.warning("Rate limiter Redis error; allowing request", error=str(exc)) diff --git a/backend/routes/ask.py b/backend/routes/ask.py index 7ef3bc9..e48117e 100644 --- a/backend/routes/ask.py +++ b/backend/routes/ask.py @@ -397,8 +397,31 @@ def _format_events_for_llm( return "\n".join(lines) +def _validate_llm_url(url: str): + """Prevent SSRF by rejecting internal/reserved addresses.""" + from urllib.parse import urlparse + + parsed = urlparse(url) + if parsed.scheme != "https": + raise RuntimeError("LLM_BASE_URL must use HTTPS") + hostname = (parsed.hostname or "").lower() + if not hostname: + raise RuntimeError("LLM_BASE_URL must have a valid hostname") + blocked = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "169.254.169.254"} + if hostname in blocked: + raise RuntimeError(f"LLM_BASE_URL hostname '{hostname}' is not allowed") + # Block link-local and private IP ranges + import ipaddress + + try: + ip = ipaddress.ip_address(hostname) + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + raise RuntimeError(f"LLM_BASE_URL IP '{hostname}' is not allowed") + except ValueError: + pass # hostname is not an IP, which is fine + + def _build_chat_url(base_url: str, api_version: str) -> str: - """Construct the chat completions URL, handling Azure OpenAI endpoints.""" base = base_url.rstrip("/") url = base if base.endswith("/chat/completions") else f"{base}/chat/completions" if api_version: @@ -424,6 +447,9 @@ async def _call_llm( }, ] + # SSRF guard: only allow known public HTTPS endpoints + _validate_llm_url(LLM_BASE_URL) + url = _build_chat_url(LLM_BASE_URL, LLM_API_VERSION) headers = { "Content-Type": "application/json", @@ -570,6 +596,8 @@ async def _explain_event(event: dict, related: list[dict]) -> str: }, ] + _validate_llm_url(LLM_BASE_URL) + url = _build_chat_url(LLM_BASE_URL, LLM_API_VERSION) headers = {"Content-Type": "application/json"} if "azure" in LLM_BASE_URL.lower() or "cognitiveservices" in LLM_BASE_URL.lower(): @@ -731,7 +759,7 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)): raw_events = list(cursor) except Exception as exc: logger.error("Failed to query events for ask", error=str(exc)) - raise HTTPException(status_code=500, detail=f"Database query failed: {exc}") from exc + raise HTTPException(status_code=500, detail="Database query failed") from exc for e in raw_events: e["_id"] = str(e.get("_id", "")) @@ -803,7 +831,6 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)): "total_matched": total, "services_queried": query_services, "excluded_services": excluded_services, - "mongo_query": json.dumps(query, default=str), }, llm_used=False, llm_error=None, @@ -863,7 +890,6 @@ async def ask_question(body: AskRequest, user: dict = Depends(require_auth)): "total_matched": total, "services_queried": query_services, "excluded_services": excluded_services, - "mongo_query": json.dumps(query, default=str), }, llm_used=llm_used, llm_error=llm_error, diff --git a/backend/routes/events.py b/backend/routes/events.py index 43bdbd3..6ab9463 100644 --- a/backend/routes/events.py +++ b/backend/routes/events.py @@ -158,7 +158,7 @@ def list_events( cursor_query = events_collection.find(query).sort([("timestamp", -1), ("_id", -1)]).limit(safe_page_size) events = list(cursor_query) except Exception as exc: - raise HTTPException(status_code=500, detail=f"Failed to query events: {exc}") from exc + raise HTTPException(status_code=500, detail="Failed to query events") from exc next_cursor = None if len(events) == safe_page_size: @@ -241,9 +241,17 @@ def bulk_tags( update = {"$set": {"tags": tags}} if body.mode == "replace" else {"$addToSet": {"tags": {"$each": tags}}} try: + matched = events_collection.count_documents(query, limit=10001) + if matched > 10000: + raise HTTPException( + status_code=400, + detail="Bulk tag update matches too many events (>10000). Narrow your filters.", + ) result_obj = events_collection.update_many(query, update) + except HTTPException: + raise except Exception as exc: - raise HTTPException(status_code=500, detail=f"Failed to update tags: {exc}") from exc + raise HTTPException(status_code=500, detail="Failed to update tags") from exc log_action( "bulk_tags", @@ -268,7 +276,7 @@ def filter_options( actor_upns = sorted([a for a in events_collection.distinct("actor_upn") if a])[:safe_limit] devices = sorted([a for a in events_collection.distinct("target_displays") if isinstance(a, str)])[:safe_limit] except Exception as exc: - raise HTTPException(status_code=500, detail=f"Failed to load filter options: {exc}") from exc + raise HTTPException(status_code=500, detail="Failed to load filter options") from exc if not user_can_access_privacy_services(user): services = [s for s in services if s not in PRIVACY_SERVICES] diff --git a/backend/routes/fetch.py b/backend/routes/fetch.py index 97a677b..7f652bf 100644 --- a/backend/routes/fetch.py +++ b/backend/routes/fetch.py @@ -1,5 +1,6 @@ import time +import structlog from audit_trail import log_action from auth import require_auth from config import ALERTS_ENABLED @@ -15,6 +16,8 @@ from sources.intune_audit import fetch_intune_audit from sources.unified_audit import fetch_unified_audit from watermark import get_watermark, set_watermark +logger = structlog.get_logger("aoc.fetch") + router = APIRouter(dependencies=[Depends(require_auth)]) @@ -85,5 +88,8 @@ def fetch_logs( user.get("sub", "anonymous"), ) return result + except HTTPException: + raise except Exception as exc: - raise HTTPException(status_code=502, detail=str(exc)) from exc + logger.error("Fetch failed", error=str(exc)) + raise HTTPException(status_code=502, detail="Failed to fetch audit logs") from exc diff --git a/backend/routes/webhooks.py b/backend/routes/webhooks.py index 16c9c3e..7b32eaa 100644 --- a/backend/routes/webhooks.py +++ b/backend/routes/webhooks.py @@ -1,4 +1,5 @@ import structlog +from config import WEBHOOK_CLIENT_SECRET from fastapi import APIRouter, Request, Response router = APIRouter() @@ -10,9 +11,12 @@ async def graph_webhook(request: Request): """ Receive Microsoft Graph change notifications. Handles the validation handshake by echoing validationToken. + Validates clientState on notifications to prevent spoofing. """ validation_token = request.query_params.get("validationToken") if validation_token: + # Microsoft sends validationToken as a query param during subscription creation. + # Echo it back as plain text to prove endpoint ownership. return Response(content=validation_token, media_type="text/plain") try: @@ -21,12 +25,26 @@ async def graph_webhook(request: Request): logger.warning("Invalid webhook payload", error=str(exc)) return Response(status_code=400) - for notification in body.get("value", []): + notifications = body.get("value", []) + if not isinstance(notifications, list): + logger.warning("Invalid webhook payload structure") + return Response(status_code=400) + + for notification in notifications: + client_state = notification.get("clientState") + if WEBHOOK_CLIENT_SECRET and client_state != WEBHOOK_CLIENT_SECRET: + logger.warning( + "Graph webhook rejected: invalid clientState", + change_type=notification.get("changeType"), + resource=notification.get("resource"), + ) + return Response(status_code=401) + logger.info( "Received Graph notification", change_type=notification.get("changeType"), resource=notification.get("resource"), - client_state=notification.get("clientState"), + client_state=client_state, ) return {"status": "accepted"} diff --git a/docker-compose.yml b/docker-compose.yml index bb841e8..1e2d3fb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,8 +3,7 @@ services: image: valkey/valkey:8-alpine container_name: aoc-redis restart: always - ports: - - "6379:6379" + # Ports not exposed to host; backend and worker connect via Docker network volumes: - redis_data:/data @@ -12,8 +11,7 @@ services: image: mongo:7 container_name: aoc-mongo restart: always - ports: - - "27017:27017" + # Ports not exposed to host; backend and worker connect via Docker network environment: MONGO_INITDB_ROOT_USERNAME: ${MONGO_ROOT_USERNAME} MONGO_INITDB_ROOT_PASSWORD: ${MONGO_ROOT_PASSWORD}