From 646d61f72e579d37df260eb30a22664c83022b6b Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Wed, 22 Apr 2026 15:13:41 +0200 Subject: [PATCH] fix: dedupe existing rules + unique index to prevent duplicates - Add unique index on alert_rules.name in setup_indexes() - seed_default_rules() now removes duplicates by name before upserting - Keeps the oldest document (_id ascending) when deduping --- backend/database.py | 1 + backend/rules.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/backend/database.py b/backend/database.py index 65b25ea..c57cd32 100644 --- a/backend/database.py +++ b/backend/database.py @@ -23,6 +23,7 @@ def setup_indexes(max_retries: int = 5, delay: float = 2.0): events_collection.create_index([("service", ASCENDING), ("timestamp", DESCENDING)]) events_collection.create_index("id") saved_searches_collection.create_index([("created_by", ASCENDING), ("created_at", DESCENDING)]) + db["alert_rules"].create_index("name", unique=True) events_collection.create_index( [("actor_display", TEXT), ("raw_text", TEXT), ("operation", TEXT)], name="text_search_index", diff --git a/backend/rules.py b/backend/rules.py index 8784ddf..35a7cc2 100644 --- a/backend/rules.py +++ b/backend/rules.py @@ -12,6 +12,7 @@ from datetime import UTC, datetime, timedelta import structlog from config import ALERT_DEDUPE_MINUTES, ALERT_WEBHOOK_FORMAT, ALERT_WEBHOOK_URL from database import db +from pymongo import ASCENDING logger = structlog.get_logger("aoc.rules") rules_collection = db["alert_rules"] @@ -137,6 +138,15 @@ def _create_alert(rule: dict, event: dict): def seed_default_rules(): """Upsert pre-built admin-ops rule templates. Safe for concurrent startup.""" + # One-time cleanup: remove duplicates by name, keep the oldest (_id ascending) + pipeline = [ + {"$sort": {"_id": ASCENDING}}, + {"$group": {"_id": "$name", "first_id": {"$first": "$_id"}}}, + ] + seen = {doc["_id"]: doc["first_id"] for doc in rules_collection.aggregate(pipeline)} + for name, keep_id in seen.items(): + rules_collection.delete_many({"name": name, "_id": {"$ne": keep_id}}) + defaults = [ { "name": "Failed Conditional Access",