diff --git a/README.md b/README.md index b0d2a7a..65f4d71 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,63 @@ # IntunePolicyParser -Parsing policies from Intune to Excel \ No newline at end of file + +A utility to parse **Markdown/HTML documentation exports** from the [IntuneManagement](https://github.com/...) tool into flat CSV files that can be further analyzed in Excel or Power BI. + +## Source files + +This parser expects as input a **Documentation export** from the IntuneManagement tool. Use the `Export Documentation (Markdown)` feature, which produces a `.md` file containing: + +- `

` sections for each policy, +- two `` blocks under each heading (Basics + Settings). + +## Usage + +```bash +python intune.py [input_file.md] [-o OUTPUT.csv] [--dedupe] [--dedupe-scope {exact,policy,global}] [--newline {lf,crlf}] +``` + +### Arguments + +- `input_file.md` + Path to the Intune documentation export (Markdown/HTML). + Defaults to `cqre.md`. + +- `-o, --output OUTPUT.csv` + Output CSV file. If not provided, the name is derived from the input (e.g. `input.md -> input.csv`). + +- `--dedupe` + Enable removal of duplicate rows. + +- `--dedupe-scope {exact,policy,global}` + How duplicates are identified: + - `exact` → full row must match (default). + - `policy` → unique per (PolicyName + SettingKey + SettingValue). + - `global` → unique per (SettingKey + SettingValue + Policy type + Platform). + +- `--newline {lf,crlf}` + Choose line endings for the output CSV: + - `lf` (Unix/macOS/Linux, default) + - `crlf` (Windows) + +## Output format + +The generated CSV contains one row per policy setting with these columns: + +- `PolicyName` +- `Description` +- `SettingKey` +- `SettingValue` +- `Policy type` +- `Platform supported` +- `Created` +- `Last modified` +- `Scope tags` + +## Example + +```bash +# Parse and dedupe by policy, exporting with LF line endings +python intune.py CQRE.NET-2025-09-22.md -o CQRE.NET-2025-09-22.csv --dedupe --dedupe-scope policy --newline lf +``` + +This creates `CQRE.NET-2025-09-22.csv` ready for analysis in Excel. \ No newline at end of file diff --git a/intune.py b/intune.py index 019a3d3..0f6a93c 100644 --- a/intune.py +++ b/intune.py @@ -87,7 +87,7 @@ def parse_key_value_table(table_tag): return data -def write_single_csv(policies, output_csv='policies.csv', dedupe=False): +def write_single_csv(policies, output_csv='policies.csv', dedupe=False, dedupe_scope="exact", lineterminator='\n'): """ Writes a single CSV with columns in this order: 1) PolicyName @@ -101,6 +101,11 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False): Each row corresponds to one Setting. If dedupe=True, exact duplicate rows (across all policies) are skipped. + `dedupe_scope` controls how duplicates are identified: + - 'exact' -> full row match (default) + - 'policy' -> (PolicyName, SettingKey, SettingValue) + - 'global' -> (SettingKey, SettingValue, Policy type, Platform supported) + - `lineterminator`: line ending to use when writing the CSV (default `\n`, use `\r\n` for Windows-style). """ # The exact order we want: columns = [ @@ -114,12 +119,35 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False): "Last modified" ] + def make_key(row_list): + if not dedupe: + return None + if dedupe_scope == "exact": + return tuple(row_list) + elif dedupe_scope == "policy": + # row_list layout: [PolicyName, Description, SettingKey, SettingValue, Policy type, Platform, Created, Last modified] + return ( + row_list[0], # PolicyName + row_list[2], # SettingKey + row_list[3], # SettingValue + ) + elif dedupe_scope == "global": + return ( + row_list[2], # SettingKey + row_list[3], # SettingValue + row_list[4], # Policy type + row_list[5], # Platform supported + ) + else: + # Fallback to exact if an unknown scope is provided + return tuple(row_list) + # De-duplication support (across the entire file) seen_rows = set() if dedupe else None rows_written = 0 with open(output_csv, 'w', newline='', encoding='utf-8') as f: - writer = csv.writer(f) + writer = csv.writer(f, lineterminator=lineterminator) # Write header writer.writerow(columns) @@ -149,7 +177,7 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False): last_modified ] if seen_rows is not None: - key = tuple(row) + key = make_key(row) if key in seen_rows: continue seen_rows.add(key) @@ -170,7 +198,7 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False): last_modified ] if seen_rows is not None: - key = tuple(row) + key = make_key(row) if key in seen_rows: continue seen_rows.add(key) @@ -208,6 +236,23 @@ def main(): "SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)." ), ) + parser.add_argument( + "--dedupe-scope", + choices=["exact", "policy", "global"], + default="exact", + help=( + "How to identify duplicates when --dedupe is set: 'exact' (full row), " + "'policy' (PolicyName+SettingKey+SettingValue), or 'global' (SettingKey+SettingValue+Policy type+Platform)." + ), + ) + parser.add_argument( + "--newline", + choices=["lf", "crlf"], + default="lf", + help=( + "Choose line endings for the output CSV: 'lf' (\\n, macOS/Linux) or 'crlf' (\\r\\n, Windows)." + ), + ) args = parser.parse_args() @@ -226,11 +271,23 @@ def main(): policies = parse_tables_from_markdown(input_path) - rows_written = write_single_csv(policies, output_csv, dedupe=args.dedupe) + lineterminator = "\n" if args.newline == "lf" else "\r\n" - msg = f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'." + rows_written = write_single_csv( + policies, + output_csv, + dedupe=args.dedupe, + dedupe_scope=args.dedupe_scope, + lineterminator=lineterminator, + ) + + msg = ( + f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'. " + f"(newline={args.newline}" + ) if args.dedupe: - msg += " (duplicates removed)" + msg += f", dedupe={args.dedupe_scope}" + msg += ")" print(msg) if __name__ == "__main__":