Adding dedupe

2025-09-22 16:24:26 +02:00
parent 35b297a91a
commit e4ff804dfb
1 changed files with 32 additions and 11 deletions
--- a/intune.py
+++ b/intune.py
@@ -87,7 +87,7 @@ def parse_key_value_table(table_tag):

    return data

-def write_single_csv(policies, output_csv='policies.csv'):
+def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
    """
    Writes a single CSV with columns in this order:
      1) PolicyName
@@ -100,6 +100,7 @@ def write_single_csv(policies, output_csv='policies.csv'):
      8) Last modified

    Each row corresponds to one Setting.
+    If dedupe=True, exact duplicate rows (across all policies) are skipped.
    """
    # The exact order we want:
    columns = [
@@ -113,6 +114,10 @@ def write_single_csv(policies, output_csv='policies.csv'):
        "Last modified"
    ]

+    # De-duplication support (across the entire file)
+    seen_rows = set() if dedupe else None
+    rows_written = 0
+
    with open(output_csv, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        # Write header
@@ -143,7 +148,13 @@ def write_single_csv(policies, output_csv='policies.csv'):
                    created,
                    last_modified
                ]
+                if seen_rows is not None:
+                    key = tuple(row)
+                    if key in seen_rows:
+                        continue
+                    seen_rows.add(key)
                writer.writerow(row)
+                rows_written += 1
                continue

            # Otherwise, write one row per setting
@@ -158,8 +169,15 @@ def write_single_csv(policies, output_csv='policies.csv'):
                    created,
                    last_modified
                ]
+                if seen_rows is not None:
+                    key = tuple(row)
+                    if key in seen_rows:
+                        continue
+                    seen_rows.add(key)
                writer.writerow(row)
+                rows_written += 1

+    return rows_written

 def main():
    parser = argparse.ArgumentParser(
@@ -182,6 +200,14 @@ def main():
        ),
        default=None,
    )
+    parser.add_argument(
+        "--dedupe",
+        action="store_true",
+        help=(
+            "Drop exact duplicate rows in the output (by the full row: PolicyName, Description, "
+            "SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)."
+        ),
+    )

    args = parser.parse_args()

@@ -200,17 +226,12 @@ def main():

    policies = parse_tables_from_markdown(input_path)

-    # Count rows that will be written (one per setting, or one if no settings)
-    row_count = 0
-    for p in policies:
-        settings = p.get("settings") or {}
-        row_count += max(len(settings), 1)
+    rows_written = write_single_csv(policies, output_csv, dedupe=args.dedupe)

-    write_single_csv(policies, output_csv)
-
-    print(
-        f"Done! Parsed {len(policies)} policies and wrote {row_count} rows to '{output_csv}'."
-    )
+    msg = f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'."
+    if args.dedupe:
+        msg += " (duplicates removed)"
+    print(msg)

 if __name__ == "__main__":
    main()