feat: implement Phase 1 hardening

- Verify JWT signatures via JWKS in auth.py
- Fix broken frontend auth button references
- Add Pydantic Settings for env validation (RETENTION_DAYS, CORS_ORIGINS)
- Create MongoDB indexes + TTL on startup
- Add /health endpoint and CORS middleware
- Escape regex input in event queries
- Fix dedupe() return calculation in maintenance.py
- Replace basic logging with structured structlog JSON logs
- Update README and add ROADMAP.md
This commit is contained in:
2026-04-14 11:48:29 +02:00
parent f9f1399f57
commit 4f6e16d64d
12 changed files with 392 additions and 46 deletions

View File

@@ -1,10 +1,11 @@
import time
import logging
import structlog
from typing import Optional, Set
import requests
from fastapi import Depends, HTTPException, Header
from jose import jwt
from jose.jwk import construct
from config import (
AUTH_ENABLED,
@@ -15,7 +16,7 @@ from config import (
)
JWKS_CACHE = {"exp": 0, "keys": []}
logger = logging.getLogger("aoc.auth")
logger = structlog.get_logger("aoc.auth")
def _get_jwks():
@@ -48,9 +49,18 @@ def _allowed(claims: dict, allowed_roles: Set[str], allowed_groups: Set[str]) ->
def _decode_token(token: str, jwks):
try:
# Unverified decode to accept tokens from single-app setups without strict signing validation.
claims = jwt.get_unverified_claims(token)
header = jwt.get_unverified_header(token)
kid = header.get("kid")
key_dict = next((k for k in jwks if k.get("kid") == kid), None)
if not key_dict:
raise HTTPException(status_code=401, detail="Invalid token: signing key not found")
key = construct(key_dict)
decode_kwargs = {"algorithms": ["RS256"]}
if AUTH_CLIENT_ID:
decode_kwargs["audience"] = AUTH_CLIENT_ID
claims = jwt.decode(token, key, **decode_kwargs)
tid = claims.get("tid")
iss = claims.get("iss", "")
if AUTH_TENANT_ID and tid and tid != AUTH_TENANT_ID:
@@ -61,7 +71,7 @@ def _decode_token(token: str, jwks):
except HTTPException:
raise
except Exception as exc:
logger.warning("Token parse failed: %s", exc)
logger.warning("Token verification failed", error=str(exc))
raise HTTPException(status_code=401, detail="Invalid token")

View File

@@ -1,22 +1,59 @@
import os
from dotenv import load_dotenv
from pydantic_settings import BaseSettings, SettingsConfigDict
load_dotenv()
TENANT_ID = os.getenv("TENANT_ID")
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = "micro_soc"
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=[".env", "../.env"],
env_file_encoding="utf-8",
extra="ignore",
)
# Optional periodic fetch settings
ENABLE_PERIODIC_FETCH = os.getenv("ENABLE_PERIODIC_FETCH", "false").lower() == "true"
FETCH_INTERVAL_MINUTES = int(os.getenv("FETCH_INTERVAL_MINUTES", "60"))
# Microsoft Graph / App credentials
TENANT_ID: str = ""
CLIENT_ID: str = ""
CLIENT_SECRET: str = ""
# Auth (OIDC/Bearer) settings
AUTH_ENABLED = os.getenv("AUTH_ENABLED", "false").lower() == "true"
AUTH_TENANT_ID = os.getenv("AUTH_TENANT_ID") or TENANT_ID or ""
AUTH_CLIENT_ID = os.getenv("AUTH_CLIENT_ID") or CLIENT_ID or ""
AUTH_SCOPE = os.getenv("AUTH_SCOPE", "")
AUTH_ALLOWED_ROLES = set([r.strip() for r in os.getenv("AUTH_ALLOWED_ROLES", "").split(",") if r.strip()])
AUTH_ALLOWED_GROUPS = set([g.strip() for g in os.getenv("AUTH_ALLOWED_GROUPS", "").split(",") if g.strip()])
# MongoDB
MONGO_URI: str = ""
DB_NAME: str = "micro_soc"
# Periodic fetch
ENABLE_PERIODIC_FETCH: bool = False
FETCH_INTERVAL_MINUTES: int = 60
# Auth (OIDC/Bearer) settings
AUTH_ENABLED: bool = False
AUTH_TENANT_ID: str = ""
AUTH_CLIENT_ID: str = ""
AUTH_SCOPE: str = ""
AUTH_ALLOWED_ROLES: str = ""
AUTH_ALLOWED_GROUPS: str = ""
# Data retention (0 = disabled)
RETENTION_DAYS: int = 0
# CORS
CORS_ORIGINS: str = "*"
_settings = Settings()
# Backward-compatible module-level exports
TENANT_ID = _settings.TENANT_ID
CLIENT_ID = _settings.CLIENT_ID
CLIENT_SECRET = _settings.CLIENT_SECRET
MONGO_URI = _settings.MONGO_URI
DB_NAME = _settings.DB_NAME
ENABLE_PERIODIC_FETCH = _settings.ENABLE_PERIODIC_FETCH
FETCH_INTERVAL_MINUTES = _settings.FETCH_INTERVAL_MINUTES
AUTH_ENABLED = _settings.AUTH_ENABLED
AUTH_TENANT_ID = _settings.AUTH_TENANT_ID or _settings.TENANT_ID or ""
AUTH_CLIENT_ID = _settings.AUTH_CLIENT_ID or _settings.CLIENT_ID or ""
AUTH_SCOPE = _settings.AUTH_SCOPE
AUTH_ALLOWED_ROLES = {r.strip() for r in _settings.AUTH_ALLOWED_ROLES.split(",") if r.strip()}
AUTH_ALLOWED_GROUPS = {g.strip() for g in _settings.AUTH_ALLOWED_GROUPS.split(",") if g.strip()}
RETENTION_DAYS = _settings.RETENTION_DAYS
CORS_ORIGINS = [o.strip() for o in _settings.CORS_ORIGINS.split(",") if o.strip()]

View File

@@ -1,6 +1,43 @@
from pymongo import MongoClient
from config import MONGO_URI, DB_NAME
from pymongo import MongoClient, ASCENDING, DESCENDING, TEXT
from config import MONGO_URI, DB_NAME, RETENTION_DAYS
import structlog
client = MongoClient(MONGO_URI)
db = client[DB_NAME]
events_collection = db["events"]
logger = structlog.get_logger("aoc.database")
def setup_indexes(max_retries: int = 5, delay: float = 2.0):
"""Ensure MongoDB indexes exist. Retries on connection errors."""
from time import sleep
for attempt in range(1, max_retries + 1):
try:
events_collection.create_index("dedupe_key", unique=True, sparse=True)
events_collection.create_index([("timestamp", DESCENDING)])
events_collection.create_index([("service", ASCENDING), ("timestamp", DESCENDING)])
events_collection.create_index("id")
events_collection.create_index(
[("actor_display", TEXT), ("raw_text", TEXT), ("operation", TEXT)],
name="text_search_index",
)
if RETENTION_DAYS > 0:
events_collection.create_index(
[("timestamp", ASCENDING)],
expireAfterSeconds=RETENTION_DAYS * 24 * 60 * 60,
name="ttl_timestamp",
)
else:
try:
events_collection.drop_index("ttl_timestamp")
except Exception:
pass
logger.info("MongoDB indexes ensured")
return
except Exception as exc:
if attempt == max_retries:
logger.error("Failed to ensure MongoDB indexes", error=str(exc))
raise
logger.warning("MongoDB not ready, retrying...", attempt=attempt, error=str(exc))
sleep(delay)

View File

@@ -299,8 +299,7 @@ async function initAuth() {
}
if (!authConfig?.auth_enabled) {
loginBtn.classList.add('hidden');
logoutBtn.classList.add('hidden');
authBtn.classList.add('hidden');
return;
}

View File

@@ -2,41 +2,85 @@ import asyncio
import logging
from pathlib import Path
from fastapi import FastAPI
import structlog
from fastapi import FastAPI, HTTPException
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from database import setup_indexes
from routes.fetch import router as fetch_router, run_fetch
from routes.events import router as events_router
from routes.config import router as config_router
from config import ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES
from config import ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES, CORS_ORIGINS
def configure_logging():
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer(),
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logging.basicConfig(format="%(message)s", level=logging.INFO)
configure_logging()
logger = structlog.get_logger("aoc.fetcher")
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(fetch_router, prefix="/api")
app.include_router(events_router, prefix="/api")
app.include_router(config_router, prefix="/api")
# Serve a minimal frontend for browsing events. Use an absolute path so it
# works regardless of the working directory used to start uvicorn.
@app.get("/health")
async def health_check():
from database import db
try:
db.command("ping")
return {"status": "ok", "database": "connected"}
except Exception as exc:
logger.error("Health check failed", error=str(exc))
raise HTTPException(status_code=503, detail="Database unavailable") from exc
frontend_dir = Path(__file__).parent / "frontend"
app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")
logger = logging.getLogger("aoc.fetcher")
async def _periodic_fetch():
while True:
try:
await asyncio.to_thread(run_fetch)
logger.info("Periodic fetch completed.")
except Exception as exc:
logger.error("Periodic fetch failed: %s", exc)
logger.error("Periodic fetch failed", error=str(exc))
await asyncio.sleep(FETCH_INTERVAL_MINUTES * 60)
@app.on_event("startup")
async def start_periodic_fetch():
setup_indexes()
if ENABLE_PERIODIC_FETCH:
app.state.fetch_task = asyncio.create_task(_periodic_fetch())

View File

@@ -79,7 +79,8 @@ def dedupe(limit: int = None, batch_size: int = 500) -> int:
if to_delete:
events_collection.delete_many({"_id": {"$in": to_delete}})
return len(seen) - processed if processed > len(seen) else 0
removed = processed - len(seen)
return removed if removed > 0 else 0
def main():

View File

@@ -5,3 +5,5 @@ python-dotenv
requests
PyYAML
python-jose[cryptography]
pydantic-settings
structlog

View File

@@ -1,3 +1,4 @@
import re
from fastapi import APIRouter, HTTPException, Depends
from database import events_collection
from auth import require_auth
@@ -22,20 +23,21 @@ def list_events(
if service:
filters.append({"service": service})
if actor:
actor_safe = re.escape(actor)
filters.append(
{
"$or": [
{"actor_display": {"$regex": actor, "$options": "i"}},
{"actor_upn": {"$regex": actor, "$options": "i"}},
{"actor.user.userPrincipalName": {"$regex": actor, "$options": "i"}},
{"actor_display": {"$regex": actor_safe, "$options": "i"}},
{"actor_upn": {"$regex": actor_safe, "$options": "i"}},
{"actor.user.userPrincipalName": {"$regex": actor_safe, "$options": "i"}},
{"actor.user.id": actor},
]
}
)
if operation:
filters.append({"operation": {"$regex": operation, "$options": "i"}})
filters.append({"operation": {"$regex": re.escape(operation), "$options": "i"}})
if result:
filters.append({"result": {"$regex": result, "$options": "i"}})
filters.append({"result": {"$regex": re.escape(result), "$options": "i"}})
if start or end:
time_filter = {}
if start:
@@ -44,14 +46,15 @@ def list_events(
time_filter["$lte"] = end
filters.append({"timestamp": time_filter})
if search:
search_safe = re.escape(search)
filters.append(
{
"$or": [
{"raw_text": {"$regex": search, "$options": "i"}},
{"display_summary": {"$regex": search, "$options": "i"}},
{"actor_display": {"$regex": search, "$options": "i"}},
{"target_displays": {"$elemMatch": {"$regex": search, "$options": "i"}}},
{"operation": {"$regex": search, "$options": "i"}},
{"raw_text": {"$regex": search_safe, "$options": "i"}},
{"display_summary": {"$regex": search_safe, "$options": "i"}},
{"actor_display": {"$regex": search_safe, "$options": "i"}},
{"target_displays": {"$elemMatch": {"$regex": search_safe, "$options": "i"}}},
{"operation": {"$regex": search_safe, "$options": "i"}},
]
}
)