feat: implement Phase 1 hardening
- Verify JWT signatures via JWKS in auth.py - Fix broken frontend auth button references - Add Pydantic Settings for env validation (RETENTION_DAYS, CORS_ORIGINS) - Create MongoDB indexes + TTL on startup - Add /health endpoint and CORS middleware - Escape regex input in event queries - Fix dedupe() return calculation in maintenance.py - Replace basic logging with structured structlog JSON logs - Update README and add ROADMAP.md
This commit is contained in:
@@ -1,10 +1,11 @@
|
||||
import time
|
||||
import logging
|
||||
import structlog
|
||||
from typing import Optional, Set
|
||||
|
||||
import requests
|
||||
from fastapi import Depends, HTTPException, Header
|
||||
from jose import jwt
|
||||
from jose.jwk import construct
|
||||
|
||||
from config import (
|
||||
AUTH_ENABLED,
|
||||
@@ -15,7 +16,7 @@ from config import (
|
||||
)
|
||||
|
||||
JWKS_CACHE = {"exp": 0, "keys": []}
|
||||
logger = logging.getLogger("aoc.auth")
|
||||
logger = structlog.get_logger("aoc.auth")
|
||||
|
||||
|
||||
def _get_jwks():
|
||||
@@ -48,9 +49,18 @@ def _allowed(claims: dict, allowed_roles: Set[str], allowed_groups: Set[str]) ->
|
||||
|
||||
def _decode_token(token: str, jwks):
|
||||
try:
|
||||
# Unverified decode to accept tokens from single-app setups without strict signing validation.
|
||||
claims = jwt.get_unverified_claims(token)
|
||||
header = jwt.get_unverified_header(token)
|
||||
kid = header.get("kid")
|
||||
key_dict = next((k for k in jwks if k.get("kid") == kid), None)
|
||||
if not key_dict:
|
||||
raise HTTPException(status_code=401, detail="Invalid token: signing key not found")
|
||||
|
||||
key = construct(key_dict)
|
||||
decode_kwargs = {"algorithms": ["RS256"]}
|
||||
if AUTH_CLIENT_ID:
|
||||
decode_kwargs["audience"] = AUTH_CLIENT_ID
|
||||
claims = jwt.decode(token, key, **decode_kwargs)
|
||||
|
||||
tid = claims.get("tid")
|
||||
iss = claims.get("iss", "")
|
||||
if AUTH_TENANT_ID and tid and tid != AUTH_TENANT_ID:
|
||||
@@ -61,7 +71,7 @@ def _decode_token(token: str, jwks):
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.warning("Token parse failed: %s", exc)
|
||||
logger.warning("Token verification failed", error=str(exc))
|
||||
raise HTTPException(status_code=401, detail="Invalid token")
|
||||
|
||||
|
||||
|
||||
@@ -1,22 +1,59 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
load_dotenv()
|
||||
|
||||
TENANT_ID = os.getenv("TENANT_ID")
|
||||
CLIENT_ID = os.getenv("CLIENT_ID")
|
||||
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
|
||||
MONGO_URI = os.getenv("MONGO_URI")
|
||||
DB_NAME = "micro_soc"
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=[".env", "../.env"],
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
# Optional periodic fetch settings
|
||||
ENABLE_PERIODIC_FETCH = os.getenv("ENABLE_PERIODIC_FETCH", "false").lower() == "true"
|
||||
FETCH_INTERVAL_MINUTES = int(os.getenv("FETCH_INTERVAL_MINUTES", "60"))
|
||||
# Microsoft Graph / App credentials
|
||||
TENANT_ID: str = ""
|
||||
CLIENT_ID: str = ""
|
||||
CLIENT_SECRET: str = ""
|
||||
|
||||
# Auth (OIDC/Bearer) settings
|
||||
AUTH_ENABLED = os.getenv("AUTH_ENABLED", "false").lower() == "true"
|
||||
AUTH_TENANT_ID = os.getenv("AUTH_TENANT_ID") or TENANT_ID or ""
|
||||
AUTH_CLIENT_ID = os.getenv("AUTH_CLIENT_ID") or CLIENT_ID or ""
|
||||
AUTH_SCOPE = os.getenv("AUTH_SCOPE", "")
|
||||
AUTH_ALLOWED_ROLES = set([r.strip() for r in os.getenv("AUTH_ALLOWED_ROLES", "").split(",") if r.strip()])
|
||||
AUTH_ALLOWED_GROUPS = set([g.strip() for g in os.getenv("AUTH_ALLOWED_GROUPS", "").split(",") if g.strip()])
|
||||
# MongoDB
|
||||
MONGO_URI: str = ""
|
||||
DB_NAME: str = "micro_soc"
|
||||
|
||||
# Periodic fetch
|
||||
ENABLE_PERIODIC_FETCH: bool = False
|
||||
FETCH_INTERVAL_MINUTES: int = 60
|
||||
|
||||
# Auth (OIDC/Bearer) settings
|
||||
AUTH_ENABLED: bool = False
|
||||
AUTH_TENANT_ID: str = ""
|
||||
AUTH_CLIENT_ID: str = ""
|
||||
AUTH_SCOPE: str = ""
|
||||
AUTH_ALLOWED_ROLES: str = ""
|
||||
AUTH_ALLOWED_GROUPS: str = ""
|
||||
|
||||
# Data retention (0 = disabled)
|
||||
RETENTION_DAYS: int = 0
|
||||
|
||||
# CORS
|
||||
CORS_ORIGINS: str = "*"
|
||||
|
||||
|
||||
_settings = Settings()
|
||||
|
||||
# Backward-compatible module-level exports
|
||||
TENANT_ID = _settings.TENANT_ID
|
||||
CLIENT_ID = _settings.CLIENT_ID
|
||||
CLIENT_SECRET = _settings.CLIENT_SECRET
|
||||
MONGO_URI = _settings.MONGO_URI
|
||||
DB_NAME = _settings.DB_NAME
|
||||
|
||||
ENABLE_PERIODIC_FETCH = _settings.ENABLE_PERIODIC_FETCH
|
||||
FETCH_INTERVAL_MINUTES = _settings.FETCH_INTERVAL_MINUTES
|
||||
|
||||
AUTH_ENABLED = _settings.AUTH_ENABLED
|
||||
AUTH_TENANT_ID = _settings.AUTH_TENANT_ID or _settings.TENANT_ID or ""
|
||||
AUTH_CLIENT_ID = _settings.AUTH_CLIENT_ID or _settings.CLIENT_ID or ""
|
||||
AUTH_SCOPE = _settings.AUTH_SCOPE
|
||||
AUTH_ALLOWED_ROLES = {r.strip() for r in _settings.AUTH_ALLOWED_ROLES.split(",") if r.strip()}
|
||||
AUTH_ALLOWED_GROUPS = {g.strip() for g in _settings.AUTH_ALLOWED_GROUPS.split(",") if g.strip()}
|
||||
|
||||
RETENTION_DAYS = _settings.RETENTION_DAYS
|
||||
CORS_ORIGINS = [o.strip() for o in _settings.CORS_ORIGINS.split(",") if o.strip()]
|
||||
|
||||
@@ -1,6 +1,43 @@
|
||||
from pymongo import MongoClient
|
||||
from config import MONGO_URI, DB_NAME
|
||||
from pymongo import MongoClient, ASCENDING, DESCENDING, TEXT
|
||||
from config import MONGO_URI, DB_NAME, RETENTION_DAYS
|
||||
import structlog
|
||||
|
||||
client = MongoClient(MONGO_URI)
|
||||
db = client[DB_NAME]
|
||||
events_collection = db["events"]
|
||||
logger = structlog.get_logger("aoc.database")
|
||||
|
||||
|
||||
def setup_indexes(max_retries: int = 5, delay: float = 2.0):
|
||||
"""Ensure MongoDB indexes exist. Retries on connection errors."""
|
||||
from time import sleep
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
events_collection.create_index("dedupe_key", unique=True, sparse=True)
|
||||
events_collection.create_index([("timestamp", DESCENDING)])
|
||||
events_collection.create_index([("service", ASCENDING), ("timestamp", DESCENDING)])
|
||||
events_collection.create_index("id")
|
||||
events_collection.create_index(
|
||||
[("actor_display", TEXT), ("raw_text", TEXT), ("operation", TEXT)],
|
||||
name="text_search_index",
|
||||
)
|
||||
if RETENTION_DAYS > 0:
|
||||
events_collection.create_index(
|
||||
[("timestamp", ASCENDING)],
|
||||
expireAfterSeconds=RETENTION_DAYS * 24 * 60 * 60,
|
||||
name="ttl_timestamp",
|
||||
)
|
||||
else:
|
||||
try:
|
||||
events_collection.drop_index("ttl_timestamp")
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("MongoDB indexes ensured")
|
||||
return
|
||||
except Exception as exc:
|
||||
if attempt == max_retries:
|
||||
logger.error("Failed to ensure MongoDB indexes", error=str(exc))
|
||||
raise
|
||||
logger.warning("MongoDB not ready, retrying...", attempt=attempt, error=str(exc))
|
||||
sleep(delay)
|
||||
|
||||
@@ -299,8 +299,7 @@ async function initAuth() {
|
||||
}
|
||||
|
||||
if (!authConfig?.auth_enabled) {
|
||||
loginBtn.classList.add('hidden');
|
||||
logoutBtn.classList.add('hidden');
|
||||
authBtn.classList.add('hidden');
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,41 +2,85 @@ import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI
|
||||
import structlog
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from database import setup_indexes
|
||||
from routes.fetch import router as fetch_router, run_fetch
|
||||
from routes.events import router as events_router
|
||||
from routes.config import router as config_router
|
||||
from config import ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES
|
||||
from config import ENABLE_PERIODIC_FETCH, FETCH_INTERVAL_MINUTES, CORS_ORIGINS
|
||||
|
||||
|
||||
def configure_logging():
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.UnicodeDecoder(),
|
||||
structlog.processors.JSONRenderer(),
|
||||
],
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
||||
|
||||
|
||||
configure_logging()
|
||||
logger = structlog.get_logger("aoc.fetcher")
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=CORS_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.include_router(fetch_router, prefix="/api")
|
||||
app.include_router(events_router, prefix="/api")
|
||||
app.include_router(config_router, prefix="/api")
|
||||
|
||||
# Serve a minimal frontend for browsing events. Use an absolute path so it
|
||||
# works regardless of the working directory used to start uvicorn.
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
from database import db
|
||||
try:
|
||||
db.command("ping")
|
||||
return {"status": "ok", "database": "connected"}
|
||||
except Exception as exc:
|
||||
logger.error("Health check failed", error=str(exc))
|
||||
raise HTTPException(status_code=503, detail="Database unavailable") from exc
|
||||
|
||||
|
||||
frontend_dir = Path(__file__).parent / "frontend"
|
||||
app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")
|
||||
|
||||
|
||||
logger = logging.getLogger("aoc.fetcher")
|
||||
|
||||
|
||||
async def _periodic_fetch():
|
||||
while True:
|
||||
try:
|
||||
await asyncio.to_thread(run_fetch)
|
||||
logger.info("Periodic fetch completed.")
|
||||
except Exception as exc:
|
||||
logger.error("Periodic fetch failed: %s", exc)
|
||||
logger.error("Periodic fetch failed", error=str(exc))
|
||||
await asyncio.sleep(FETCH_INTERVAL_MINUTES * 60)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def start_periodic_fetch():
|
||||
setup_indexes()
|
||||
if ENABLE_PERIODIC_FETCH:
|
||||
app.state.fetch_task = asyncio.create_task(_periodic_fetch())
|
||||
|
||||
|
||||
@@ -79,7 +79,8 @@ def dedupe(limit: int = None, batch_size: int = 500) -> int:
|
||||
if to_delete:
|
||||
events_collection.delete_many({"_id": {"$in": to_delete}})
|
||||
|
||||
return len(seen) - processed if processed > len(seen) else 0
|
||||
removed = processed - len(seen)
|
||||
return removed if removed > 0 else 0
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
@@ -5,3 +5,5 @@ python-dotenv
|
||||
requests
|
||||
PyYAML
|
||||
python-jose[cryptography]
|
||||
pydantic-settings
|
||||
structlog
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from database import events_collection
|
||||
from auth import require_auth
|
||||
@@ -22,20 +23,21 @@ def list_events(
|
||||
if service:
|
||||
filters.append({"service": service})
|
||||
if actor:
|
||||
actor_safe = re.escape(actor)
|
||||
filters.append(
|
||||
{
|
||||
"$or": [
|
||||
{"actor_display": {"$regex": actor, "$options": "i"}},
|
||||
{"actor_upn": {"$regex": actor, "$options": "i"}},
|
||||
{"actor.user.userPrincipalName": {"$regex": actor, "$options": "i"}},
|
||||
{"actor_display": {"$regex": actor_safe, "$options": "i"}},
|
||||
{"actor_upn": {"$regex": actor_safe, "$options": "i"}},
|
||||
{"actor.user.userPrincipalName": {"$regex": actor_safe, "$options": "i"}},
|
||||
{"actor.user.id": actor},
|
||||
]
|
||||
}
|
||||
)
|
||||
if operation:
|
||||
filters.append({"operation": {"$regex": operation, "$options": "i"}})
|
||||
filters.append({"operation": {"$regex": re.escape(operation), "$options": "i"}})
|
||||
if result:
|
||||
filters.append({"result": {"$regex": result, "$options": "i"}})
|
||||
filters.append({"result": {"$regex": re.escape(result), "$options": "i"}})
|
||||
if start or end:
|
||||
time_filter = {}
|
||||
if start:
|
||||
@@ -44,14 +46,15 @@ def list_events(
|
||||
time_filter["$lte"] = end
|
||||
filters.append({"timestamp": time_filter})
|
||||
if search:
|
||||
search_safe = re.escape(search)
|
||||
filters.append(
|
||||
{
|
||||
"$or": [
|
||||
{"raw_text": {"$regex": search, "$options": "i"}},
|
||||
{"display_summary": {"$regex": search, "$options": "i"}},
|
||||
{"actor_display": {"$regex": search, "$options": "i"}},
|
||||
{"target_displays": {"$elemMatch": {"$regex": search, "$options": "i"}}},
|
||||
{"operation": {"$regex": search, "$options": "i"}},
|
||||
{"raw_text": {"$regex": search_safe, "$options": "i"}},
|
||||
{"display_summary": {"$regex": search_safe, "$options": "i"}},
|
||||
{"actor_display": {"$regex": search_safe, "$options": "i"}},
|
||||
{"target_displays": {"$elemMatch": {"$regex": search_safe, "$options": "i"}}},
|
||||
{"operation": {"$regex": search_safe, "$options": "i"}},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user