From e4ff804dfb2bdb299af4b651d74b50eae9396f53 Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Mon, 22 Sep 2025 16:24:26 +0200 Subject: [PATCH] Adding dedupe --- intune.py | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/intune.py b/intune.py index f65c458..019a3d3 100644 --- a/intune.py +++ b/intune.py @@ -87,7 +87,7 @@ def parse_key_value_table(table_tag): return data -def write_single_csv(policies, output_csv='policies.csv'): +def write_single_csv(policies, output_csv='policies.csv', dedupe=False): """ Writes a single CSV with columns in this order: 1) PolicyName @@ -100,6 +100,7 @@ def write_single_csv(policies, output_csv='policies.csv'): 8) Last modified Each row corresponds to one Setting. + If dedupe=True, exact duplicate rows (across all policies) are skipped. """ # The exact order we want: columns = [ @@ -113,6 +114,10 @@ def write_single_csv(policies, output_csv='policies.csv'): "Last modified" ] + # De-duplication support (across the entire file) + seen_rows = set() if dedupe else None + rows_written = 0 + with open(output_csv, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) # Write header @@ -143,7 +148,13 @@ def write_single_csv(policies, output_csv='policies.csv'): created, last_modified ] + if seen_rows is not None: + key = tuple(row) + if key in seen_rows: + continue + seen_rows.add(key) writer.writerow(row) + rows_written += 1 continue # Otherwise, write one row per setting @@ -158,8 +169,15 @@ def write_single_csv(policies, output_csv='policies.csv'): created, last_modified ] + if seen_rows is not None: + key = tuple(row) + if key in seen_rows: + continue + seen_rows.add(key) writer.writerow(row) + rows_written += 1 + return rows_written def main(): parser = argparse.ArgumentParser( @@ -182,6 +200,14 @@ def main(): ), default=None, ) + parser.add_argument( + "--dedupe", + action="store_true", + help=( + "Drop exact duplicate rows in the output (by the full row: PolicyName, Description, " + "SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)." + ), + ) args = parser.parse_args() @@ -200,17 +226,12 @@ def main(): policies = parse_tables_from_markdown(input_path) - # Count rows that will be written (one per setting, or one if no settings) - row_count = 0 - for p in policies: - settings = p.get("settings") or {} - row_count += max(len(settings), 1) + rows_written = write_single_csv(policies, output_csv, dedupe=args.dedupe) - write_single_csv(policies, output_csv) - - print( - f"Done! Parsed {len(policies)} policies and wrote {row_count} rows to '{output_csv}'." - ) + msg = f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'." + if args.dedupe: + msg += " (duplicates removed)" + print(msg) if __name__ == "__main__": main()