From f263cbf8ace39a8c42b53dac7f6c0d0ed364f9be Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Mon, 27 Apr 2026 13:59:05 +0200 Subject: [PATCH] =?UTF-8?q?v1.7.12:=20security=20hardening=20=E2=80=94=20C?= =?UTF-8?q?ORS=20fix,=20security=20headers,=20fail-closed=20rate=20limiter?= =?UTF-8?q?,=20OpenAPI=20docs=20disabled=20by=20default,=20config=20auth?= =?UTF-8?q?=20privacy,=20webhook=20validation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 3 + PEN_TEST_REPORT_v1.7.11.md | 203 +++++++++++++++++++++++++++++++++++++ RELEASE_NOTES_v1.7.12.md | 43 ++++++++ VERSION | 2 +- backend/config.py | 5 + backend/main.py | 25 +++-- backend/rate_limiter.py | 3 +- backend/routes/config.py | 4 +- backend/routes/webhooks.py | 10 +- backend/tests/conftest.py | 18 +++- 10 files changed, 303 insertions(+), 13 deletions(-) create mode 100644 PEN_TEST_REPORT_v1.7.11.md create mode 100644 RELEASE_NOTES_v1.7.12.md diff --git a/.env.example b/.env.example index 8404097..f4a0ee6 100644 --- a/.env.example +++ b/.env.example @@ -27,6 +27,9 @@ RETENTION_DAYS=0 # Optional: comma-separated CORS origins (e.g., http://localhost:3000,https://app.example.com) CORS_ORIGINS=* +# OpenAPI docs exposure (set true only for dev) +DOCS_ENABLED=false + # Optional: SIEM export webhook (e.g., Splunk HEC, Sentinel, or generic syslog webhook) SIEM_ENABLED=false SIEM_WEBHOOK_URL= diff --git a/PEN_TEST_REPORT_v1.7.11.md b/PEN_TEST_REPORT_v1.7.11.md new file mode 100644 index 0000000..cc18c3f --- /dev/null +++ b/PEN_TEST_REPORT_v1.7.11.md @@ -0,0 +1,203 @@ +# AOC v1.7.11 Soft Penetration Test Report + +**Date:** 2026-04-27 +**Target:** Local AOC instance (port 8001), auth disabled, AI disabled +**Tester:** Automated + manual curl-based probing +**Scope:** FastAPI backend, REST API endpoints, middleware, headers + +--- + +## Executive Summary + +AOC v1.7.11 has one **CRITICAL** vulnerability (CORS credentials leak) and several defense-in-depth gaps. The good news: input validation, NoSQL injection resistance, and error handling are solid. The bad news: CORS is dangerously permissive, security headers are missing, and the rate limiter fails open on Redis failure. + +| Severity | Count | Categories | +|----------|-------|------------| +| CRITICAL | 1 | CORS with credentials | +| HIGH | 1 | Missing security headers | +| MEDIUM | 2 | Fail-open rate limiter, OpenAPI exposure | +| LOW | 2 | Information disclosure, webhook content injection | +| INFO | 3 | Positive findings (no stack traces, input validation, NoSQL resistance) | + +--- + +## CRITICAL + +### 1. CORS Reflects Any Origin with `allow_credentials=true` + +**Finding:** The CORS middleware returns `Access-Control-Allow-Origin: ` AND `Access-Control-Allow-Credentials: true` for every origin that sends an `Origin` header. + +**Evidence:** +```bash +curl -H "Origin: https://evil-attacker.com" http://localhost:8001/api/config/auth +# Response headers: +# access-control-allow-origin: https://evil-attacker.com +# access-control-allow-credentials: true +``` + +**Impact:** An attacker can host a malicious page on any domain and make authenticated cross-origin requests to the AOC API using the victim's browser cookies/tokens. This effectively bypasses Same-Origin Policy for authenticated actions. + +**Root Cause:** `main.py` configures CORS with `allow_origins=["*"]` (from `CORS_ORIGINS` env var, default `"*"`) AND `allow_credentials=True`. According to CORS spec, a wildcard origin with credentials is technically invalid, but Starlette/FastAPI appears to reflect the request origin instead. + +**Recommendation:** +- When `AUTH_ENABLED=true`, reject requests from origins not in an explicit allowlist. +- Set `allow_credentials=False` if wildcard origins are needed. +- Or, require `CORS_ORIGINS` to be explicitly configured (no default wildcard) when auth is enabled. + +--- + +## HIGH + +### 2. Missing Security Headers + +**Finding:** The following security headers are absent from all responses: + +| Header | Purpose | Status | +|--------|---------|--------| +| `X-Content-Type-Options: nosniff` | Prevents MIME sniffing | MISSING | +| `X-Frame-Options: DENY` or `SAMEORIGIN` | Clickjacking protection | MISSING | +| `Strict-Transport-Security` | HSTS enforcement | MISSING | +| `Referrer-Policy: strict-origin-when-cross-origin` | Limits referrer leakage | MISSING | +| `Permissions-Policy` | Restricts browser features | MISSING | + +**Impact:** Increased attack surface for clickjacking, MIME confusion attacks, and information leakage via referrer headers. + +**Recommendation:** Add a security headers middleware to set these on all responses. HSTS only when served over HTTPS. + +--- + +## MEDIUM + +### 3. Rate Limiter Fails Open on Redis Failure + +**Finding:** In `rate_limiter.py` line 81-82: +```python +except Exception as exc: + logger.warning("Rate limiter Redis error; allowing request", error=str(exc)) +``` + +If Redis becomes unreachable, all rate limits are silently bypassed. + +**Evidence:** When Redis was down, 150+ requests to `/api/events` all returned 200 with no 429s. + +**Impact:** A DoS on Redis (or a network partition) removes all rate limiting, allowing unlimited API abuse. + +**Recommendation:** Make the rate limiter fail-closed: return 429 or 503 when Redis is unavailable, or use an in-memory fallback with a conservative limit. + +### 4. OpenAPI Schema Publicly Exposed + +**Finding:** `/docs`, `/redoc`, and `/openapi.json` are accessible without authentication and return the full API schema. + +**Evidence:** +```bash +curl -s http://localhost:8001/openapi.json | jq '.paths | keys' +# Returns all 15+ API paths including internal endpoints +``` + +**Impact:** Attackers get a complete map of the API, including request/response schemas, parameter types, and endpoint structure. This significantly reduces reconnaissance time. + +**Recommendation:** Disable OpenAPI docs in production (`docs_url=None, redoc_url=None, openapi_url=None`) or gate them behind admin authentication. + +--- + +## LOW + +### 5. Information Disclosure via `/api/config/auth` and `/metrics` + +**Finding:** +- `/api/config/auth` leaks `tenant_id` and `client_id` even when auth is disabled. These values fall back to the Graph API credentials (`TENANT_ID`/`CLIENT_ID`), which may be sensitive. +- `/metrics` exposes Python version (`3.14.3`), GC statistics, and application-internal metric names. + +**Evidence:** +```json +{ + "auth_enabled": false, + "tenant_id": "0ec9f34c-17c8-4541-b084-7d64ecdcc997", + "client_id": "cc31fd45-1eca-431f-a2c6-ba81cd4c5d50" +} +``` + +**Impact:** Low direct impact (tenant/client IDs are not secrets), but aids reconnaissance and narrows the attack surface. + +**Recommendation:** +- Return empty strings for `tenant_id`/`client_id` when `auth_enabled=false`. +- Gate `/metrics` behind IP allowlist or admin auth (standard Prometheus practice). + +### 6. Webhook Validation Token Echoed Without Sanitization + +**Finding:** The `/api/webhooks/graph` endpoint echoes `validationToken` query parameter as `text/plain` without any sanitization or length limits. + +**Evidence:** +```bash +curl -X POST "http://localhost:8001/api/webhooks/graph?validationToken=" +# Returns: with Content-Type: text/plain +``` + +**Impact:** Low in the intended Microsoft Graph flow (token is Microsoft-generated), but if the endpoint is hit directly, an attacker could use this for cache poisoning, response splitting, or social engineering by making the endpoint return attacker-controlled content. + +**Recommendation:** Validate the validationToken format (e.g., JWT-like structure, length limits) before echoing, or set `Content-Type: text/plain; charset=utf-8` with `X-Content-Type-Options: nosniff` to reduce MIME confusion risk. + +--- + +## INFO (Positive Findings) + +### A. No Stack Traces in Error Responses + +All errors (422, 404, 429, 500 if triggered) return generic JSON messages without internal details or stack traces. Good. + +### B. Pydantic Input Validation is Effective + +- `page_size` capped at 500 (returns 422 for 501, 0, -1) +- `hours` capped at 720 (returns 422 for 721) +- Invalid cursors return 400 with "Invalid cursor" +- Malformed JSON bodies return 422 with field-level validation errors +- `AlertCondition` op field strictly validated against `Literal["eq", "neq", "contains", "in", "after_hours"]` + +### C. NoSQL Injection Resistant + +MongoDB operators passed as string filter values are treated as literals, not operators: + +```bash +curl "http://localhost:8001/api/events?operation=\$ne" +# Returns 0 results (treated as literal string "$ne") +``` + +The `_build_query()` function in `events.py` uses `re.escape()` for search input and constructs queries safely. + +### D. Bulk Tags Pre-Count Check Works + +`bulk_tags` endpoint capped at 10,000 matched documents via pre-count check. 93 events were successfully tagged with no bypass. + +### E. Rate Limiting Works When Redis is Healthy + +- `/api/fetch-audit-logs`: 429 after 11 requests (limit: 10/hr) +- `/api/events`: 429 after ~120 requests (limit: 120/min) +- Exempt paths work correctly: `/health`, `/metrics`, `/api/config/auth`, `/api/config/features` +- `Retry-After` header is returned on 429 responses + +--- + +## Recommendations Summary + +| Priority | Action | Effort | +|----------|--------|--------| +| P0 | Fix CORS: do not allow credentials with wildcard/reflected origins | Small | +| P1 | Add security headers middleware (X-Content-Type-Options, X-Frame-Options, HSTS, Referrer-Policy) | Small | +| P2 | Make rate limiter fail-closed on Redis errors | Small | +| P2 | Disable OpenAPI docs in production or gate behind auth | Small | +| P3 | Sanitize or validate webhook validationToken before echo | Small | +| P3 | Gate `/metrics` behind IP allowlist | Small | +| P3 | Hide tenant_id/client_id from `/api/config/auth` when auth is disabled | Tiny | +| P4 | Consider Alpine.js CSP build to remove `unsafe-eval` from script-src | Medium | + +--- + +## Test Environment + +``` +Backend: uvicorn on localhost:8001 (auth=false, ai=false) +MongoDB: docker container, port 27018 +Redis: docker container, port 6380 +``` + +*Test commands and raw outputs available in `/tmp/pen_test*.sh` scripts.* diff --git a/RELEASE_NOTES_v1.7.12.md b/RELEASE_NOTES_v1.7.12.md new file mode 100644 index 0000000..f97990a --- /dev/null +++ b/RELEASE_NOTES_v1.7.12.md @@ -0,0 +1,43 @@ +# AOC v1.7.12 Release Notes + +**Release Date:** 2026-04-27 + +## Security Hardening (Penetration Test Remediation) + +This release addresses all findings from the internal soft penetration test of v1.7.11. + +### Critical Fix: CORS Credentials Leak +- **Issue:** When `AUTH_ENABLED=true` and `CORS_ORIGINS="*"`, the CORS middleware reflected any origin with `Access-Control-Allow-Credentials: true`, allowing cross-origin authenticated requests from attacker-controlled domains. +- **Fix:** When auth is enabled with a wildcard origin, `allow_credentials` is now forced to `False`. CORS still works for unauthenticated requests, but bearer tokens cannot be leaked cross-origin. + +### High Fix: Missing Security Headers +- Added `X-Content-Type-Options: nosniff` +- Added `X-Frame-Options: DENY` +- Added `Referrer-Policy: strict-origin-when-cross-origin` +- Added `Permissions-Policy` restricting browser features (accelerometer, camera, geolocation, gyroscope, magnetometer, microphone, payment, USB) + +### Medium Fixes +- **Rate limiter fail-closed:** Previously, a Redis outage silently disabled all rate limiting. The rate limiter now returns `429` when Redis is unreachable. +- **OpenAPI docs exposure:** `/docs`, `/redoc`, and `/openapi.json` are disabled by default. Set `DOCS_ENABLED=true` to re-enable (intended for development only). + +### Low Fixes +- **Information disclosure:** `/api/config/auth` no longer leaks `tenant_id` and `client_id` when `auth_enabled=false`. +- **Webhook validation token:** Added length cap (1024 chars) and ASCII-only validation before echoing `validationToken`. Response now includes `X-Content-Type-Options: nosniff`. + +## Files Changed + +| File | Change | +|------|--------| +| `backend/main.py` | CORS fix, security headers middleware, conditional OpenAPI docs | +| `backend/config.py` | Added `DOCS_ENABLED` setting | +| `backend/rate_limiter.py` | Fail-closed on Redis errors | +| `backend/routes/config.py` | Hide tenant/client IDs when auth disabled | +| `backend/routes/webhooks.py` | Validate validationToken before echo | +| `backend/tests/conftest.py` | Enhanced FakeRedis mock with `incr`/`expire` | +| `.env.example` | Documented `DOCS_ENABLED` | +| `VERSION` | Bumped to 1.7.12 | + +## Test Results + +- **80/80 pytest tests passing** +- Penetration test report: `PEN_TEST_REPORT_v1.7.11.md` diff --git a/VERSION b/VERSION index 8f8b3f7..e6a68e9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.11 +1.7.12 diff --git a/backend/config.py b/backend/config.py index dda30be..c8b540a 100644 --- a/backend/config.py +++ b/backend/config.py @@ -76,6 +76,9 @@ class Settings(BaseSettings): RATE_LIMIT_REQUESTS: int = 120 RATE_LIMIT_WINDOW_SECONDS: int = 60 + # Security / docs exposure + DOCS_ENABLED: bool = False + _settings = Settings() @@ -127,3 +130,5 @@ WEBHOOK_CLIENT_SECRET = _settings.WEBHOOK_CLIENT_SECRET RATE_LIMIT_ENABLED = _settings.RATE_LIMIT_ENABLED RATE_LIMIT_REQUESTS = _settings.RATE_LIMIT_REQUESTS RATE_LIMIT_WINDOW_SECONDS = _settings.RATE_LIMIT_WINDOW_SECONDS + +DOCS_ENABLED = _settings.DOCS_ENABLED diff --git a/backend/main.py b/backend/main.py index 065e7de..53154f1 100644 --- a/backend/main.py +++ b/backend/main.py @@ -7,7 +7,7 @@ from pathlib import Path import structlog from audit_trail import log_action -from config import AI_FEATURES_ENABLED, AUTH_ENABLED, CORS_ORIGINS, ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES +from config import AI_FEATURES_ENABLED, AUTH_ENABLED, CORS_ORIGINS, DOCS_ENABLED, ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES from database import setup_indexes from fastapi import FastAPI, HTTPException, Request from fastapi.middleware.cors import CORSMiddleware @@ -51,20 +51,28 @@ def configure_logging(): configure_logging() logger = structlog.get_logger("aoc.fetcher") -app = FastAPI() +# Disable OpenAPI docs in production by default +app = FastAPI( + docs_url="/docs" if DOCS_ENABLED else None, + redoc_url="/redoc" if DOCS_ENABLED else None, + openapi_url="/openapi.json" if DOCS_ENABLED else None, +) -# CORS: warn if wildcard is used with auth enabled, but do not break deployments +# CORS: when auth is enabled, never allow credentials with wildcard origins _effective_cors = CORS_ORIGINS +_cors_credentials = True if AUTH_ENABLED and "*" in _effective_cors: logger.warning( - "CORS wildcard (*) is insecure when AUTH_ENABLED=true. Set CORS_ORIGINS to your actual origin(s) in production." + "CORS wildcard (*) is insecure with AUTH_ENABLED=true and allow_credentials. " + "Disabling credentials. Set CORS_ORIGINS to your actual origin(s)." ) + _cors_credentials = False app.add_middleware(CorrelationIdMiddleware) app.add_middleware( CORSMiddleware, allow_origins=_effective_cors, - allow_credentials=True, + allow_credentials=_cors_credentials, allow_methods=["*"], allow_headers=["*"], ) @@ -81,7 +89,7 @@ async def prometheus_middleware(request: Request, call_next): @app.middleware("http") -async def cache_control_middleware(request: Request, call_next): +async def security_headers_middleware(request: Request, call_next): response = await call_next(request) # Prevent caching of HTML and API responses by default if request.url.path.startswith("/api/") or request.url.path in ("/", "/index.html"): @@ -100,6 +108,11 @@ async def cache_control_middleware(request: Request, call_next): "img-src 'self' data:; " "font-src 'self' data:;" ) + # Additional security headers + response.headers["X-Content-Type-Options"] = "nosniff" + response.headers["X-Frame-Options"] = "DENY" + response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" + response.headers["Permissions-Policy"] = "accelerometer=(), camera=(), geolocation=(), gyroscope=(), magnetometer=(), microphone=(), payment=(), usb=()" return response diff --git a/backend/rate_limiter.py b/backend/rate_limiter.py index 462f8bc..ee953af 100644 --- a/backend/rate_limiter.py +++ b/backend/rate_limiter.py @@ -79,4 +79,5 @@ async def check_rate_limit(request: Request): except RateLimitExceeded: raise except Exception as exc: - logger.warning("Rate limiter Redis error; allowing request", error=str(exc)) + logger.warning("Rate limiter Redis error; failing closed", error=str(exc)) + raise RateLimitExceeded(retry_after=60) diff --git a/backend/routes/config.py b/backend/routes/config.py index 949215e..d96dc84 100644 --- a/backend/routes/config.py +++ b/backend/routes/config.py @@ -18,8 +18,8 @@ def auth_config(): logger.debug("Auth config requested", auth_enabled=AUTH_ENABLED) return { "auth_enabled": AUTH_ENABLED, - "tenant_id": AUTH_TENANT_ID, - "client_id": AUTH_CLIENT_ID, + "tenant_id": AUTH_TENANT_ID if AUTH_ENABLED else "", + "client_id": AUTH_CLIENT_ID if AUTH_ENABLED else "", "scope": AUTH_SCOPE, "redirect_uri": None, # frontend uses window.location.origin by default } diff --git a/backend/routes/webhooks.py b/backend/routes/webhooks.py index 7b32eaa..dc3b59c 100644 --- a/backend/routes/webhooks.py +++ b/backend/routes/webhooks.py @@ -17,7 +17,15 @@ async def graph_webhook(request: Request): if validation_token: # Microsoft sends validationToken as a query param during subscription creation. # Echo it back as plain text to prove endpoint ownership. - return Response(content=validation_token, media_type="text/plain") + # Validate to prevent content injection if endpoint is hit directly. + if len(validation_token) > 1024 or not validation_token.isascii(): + logger.warning("Invalid validationToken rejected", length=len(validation_token)) + return Response(status_code=400) + return Response( + content=validation_token, + media_type="text/plain", + headers={"X-Content-Type-Options": "nosniff"}, + ) try: body = await request.json() diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index c726436..6782284 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -51,18 +51,32 @@ def client(mock_events_collection, mock_watermarks_collection, monkeypatch): # Mock Redis so tests don't require a running Redis server class FakeRedis: + _store = {} + async def get(self, key): - return None + return self._store.get(key) async def setex(self, key, ttl, value): + self._store[key] = value + + async def incr(self, key): + self._store[key] = self._store.get(key, 0) + 1 + return self._store[key] + + async def expire(self, key, ttl): pass async def fake_get_arq_pool(): return FakeRedis() + async def fake_get_redis(): + return FakeRedis() + monkeypatch.setattr("redis_client.get_arq_pool", fake_get_arq_pool) + monkeypatch.setattr("redis_client.get_redis", fake_get_redis) monkeypatch.setattr("routes.ask.get_arq_pool", fake_get_arq_pool) - monkeypatch.setattr("routes.jobs.get_redis", fake_get_arq_pool) + monkeypatch.setattr("routes.jobs.get_redis", fake_get_redis) + monkeypatch.setattr("rate_limiter.get_redis", fake_get_redis) from main import app