Adding dedupe
This commit is contained in:
43
intune.py
43
intune.py
@@ -87,7 +87,7 @@ def parse_key_value_table(table_tag):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def write_single_csv(policies, output_csv='policies.csv'):
|
def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
|
||||||
"""
|
"""
|
||||||
Writes a single CSV with columns in this order:
|
Writes a single CSV with columns in this order:
|
||||||
1) PolicyName
|
1) PolicyName
|
||||||
@@ -100,6 +100,7 @@ def write_single_csv(policies, output_csv='policies.csv'):
|
|||||||
8) Last modified
|
8) Last modified
|
||||||
|
|
||||||
Each row corresponds to one Setting.
|
Each row corresponds to one Setting.
|
||||||
|
If dedupe=True, exact duplicate rows (across all policies) are skipped.
|
||||||
"""
|
"""
|
||||||
# The exact order we want:
|
# The exact order we want:
|
||||||
columns = [
|
columns = [
|
||||||
@@ -113,6 +114,10 @@ def write_single_csv(policies, output_csv='policies.csv'):
|
|||||||
"Last modified"
|
"Last modified"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# De-duplication support (across the entire file)
|
||||||
|
seen_rows = set() if dedupe else None
|
||||||
|
rows_written = 0
|
||||||
|
|
||||||
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
||||||
writer = csv.writer(f)
|
writer = csv.writer(f)
|
||||||
# Write header
|
# Write header
|
||||||
@@ -143,7 +148,13 @@ def write_single_csv(policies, output_csv='policies.csv'):
|
|||||||
created,
|
created,
|
||||||
last_modified
|
last_modified
|
||||||
]
|
]
|
||||||
|
if seen_rows is not None:
|
||||||
|
key = tuple(row)
|
||||||
|
if key in seen_rows:
|
||||||
|
continue
|
||||||
|
seen_rows.add(key)
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
rows_written += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Otherwise, write one row per setting
|
# Otherwise, write one row per setting
|
||||||
@@ -158,8 +169,15 @@ def write_single_csv(policies, output_csv='policies.csv'):
|
|||||||
created,
|
created,
|
||||||
last_modified
|
last_modified
|
||||||
]
|
]
|
||||||
|
if seen_rows is not None:
|
||||||
|
key = tuple(row)
|
||||||
|
if key in seen_rows:
|
||||||
|
continue
|
||||||
|
seen_rows.add(key)
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
rows_written += 1
|
||||||
|
|
||||||
|
return rows_written
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
@@ -182,6 +200,14 @@ def main():
|
|||||||
),
|
),
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dedupe",
|
||||||
|
action="store_true",
|
||||||
|
help=(
|
||||||
|
"Drop exact duplicate rows in the output (by the full row: PolicyName, Description, "
|
||||||
|
"SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -200,17 +226,12 @@ def main():
|
|||||||
|
|
||||||
policies = parse_tables_from_markdown(input_path)
|
policies = parse_tables_from_markdown(input_path)
|
||||||
|
|
||||||
# Count rows that will be written (one per setting, or one if no settings)
|
rows_written = write_single_csv(policies, output_csv, dedupe=args.dedupe)
|
||||||
row_count = 0
|
|
||||||
for p in policies:
|
|
||||||
settings = p.get("settings") or {}
|
|
||||||
row_count += max(len(settings), 1)
|
|
||||||
|
|
||||||
write_single_csv(policies, output_csv)
|
msg = f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'."
|
||||||
|
if args.dedupe:
|
||||||
print(
|
msg += " (duplicates removed)"
|
||||||
f"Done! Parsed {len(policies)} policies and wrote {row_count} rows to '{output_csv}'."
|
print(msg)
|
||||||
)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Reference in New Issue
Block a user