fix: dedupe existing rules + unique index to prevent duplicates

- Add unique index on alert_rules.name in setup_indexes()
- seed_default_rules() now removes duplicates by name before upserting
- Keeps the oldest document (_id ascending) when deduping
This commit is contained in:
2026-04-22 15:13:41 +02:00
parent 5f7a98f21c
commit 646d61f72e
2 changed files with 11 additions and 0 deletions

View File

@@ -23,6 +23,7 @@ def setup_indexes(max_retries: int = 5, delay: float = 2.0):
events_collection.create_index([("service", ASCENDING), ("timestamp", DESCENDING)]) events_collection.create_index([("service", ASCENDING), ("timestamp", DESCENDING)])
events_collection.create_index("id") events_collection.create_index("id")
saved_searches_collection.create_index([("created_by", ASCENDING), ("created_at", DESCENDING)]) saved_searches_collection.create_index([("created_by", ASCENDING), ("created_at", DESCENDING)])
db["alert_rules"].create_index("name", unique=True)
events_collection.create_index( events_collection.create_index(
[("actor_display", TEXT), ("raw_text", TEXT), ("operation", TEXT)], [("actor_display", TEXT), ("raw_text", TEXT), ("operation", TEXT)],
name="text_search_index", name="text_search_index",

View File

@@ -12,6 +12,7 @@ from datetime import UTC, datetime, timedelta
import structlog import structlog
from config import ALERT_DEDUPE_MINUTES, ALERT_WEBHOOK_FORMAT, ALERT_WEBHOOK_URL from config import ALERT_DEDUPE_MINUTES, ALERT_WEBHOOK_FORMAT, ALERT_WEBHOOK_URL
from database import db from database import db
from pymongo import ASCENDING
logger = structlog.get_logger("aoc.rules") logger = structlog.get_logger("aoc.rules")
rules_collection = db["alert_rules"] rules_collection = db["alert_rules"]
@@ -137,6 +138,15 @@ def _create_alert(rule: dict, event: dict):
def seed_default_rules(): def seed_default_rules():
"""Upsert pre-built admin-ops rule templates. Safe for concurrent startup.""" """Upsert pre-built admin-ops rule templates. Safe for concurrent startup."""
# One-time cleanup: remove duplicates by name, keep the oldest (_id ascending)
pipeline = [
{"$sort": {"_id": ASCENDING}},
{"$group": {"_id": "$name", "first_id": {"$first": "$_id"}}},
]
seen = {doc["_id"]: doc["first_id"] for doc in rules_collection.aggregate(pipeline)}
for name, keep_id in seen.items():
rules_collection.delete_many({"name": name, "_id": {"$ne": keep_id}})
defaults = [ defaults = [
{ {
"name": "Failed Conditional Access", "name": "Failed Conditional Access",