Fixing dedupe
This commit is contained in:
62
README.md
62
README.md
@@ -1,3 +1,63 @@
|
|||||||
# IntunePolicyParser
|
# IntunePolicyParser
|
||||||
|
|
||||||
Parsing policies from Intune to Excel
|
|
||||||
|
A utility to parse **Markdown/HTML documentation exports** from the [IntuneManagement](https://github.com/...) tool into flat CSV files that can be further analyzed in Excel or Power BI.
|
||||||
|
|
||||||
|
## Source files
|
||||||
|
|
||||||
|
This parser expects as input a **Documentation export** from the IntuneManagement tool. Use the `Export Documentation (Markdown)` feature, which produces a `.md` file containing:
|
||||||
|
|
||||||
|
- `<h3>` sections for each policy,
|
||||||
|
- two `<table class="table-settings">` blocks under each heading (Basics + Settings).
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python intune.py [input_file.md] [-o OUTPUT.csv] [--dedupe] [--dedupe-scope {exact,policy,global}] [--newline {lf,crlf}]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Arguments
|
||||||
|
|
||||||
|
- `input_file.md`
|
||||||
|
Path to the Intune documentation export (Markdown/HTML).
|
||||||
|
Defaults to `cqre.md`.
|
||||||
|
|
||||||
|
- `-o, --output OUTPUT.csv`
|
||||||
|
Output CSV file. If not provided, the name is derived from the input (e.g. `input.md -> input.csv`).
|
||||||
|
|
||||||
|
- `--dedupe`
|
||||||
|
Enable removal of duplicate rows.
|
||||||
|
|
||||||
|
- `--dedupe-scope {exact,policy,global}`
|
||||||
|
How duplicates are identified:
|
||||||
|
- `exact` → full row must match (default).
|
||||||
|
- `policy` → unique per (PolicyName + SettingKey + SettingValue).
|
||||||
|
- `global` → unique per (SettingKey + SettingValue + Policy type + Platform).
|
||||||
|
|
||||||
|
- `--newline {lf,crlf}`
|
||||||
|
Choose line endings for the output CSV:
|
||||||
|
- `lf` (Unix/macOS/Linux, default)
|
||||||
|
- `crlf` (Windows)
|
||||||
|
|
||||||
|
## Output format
|
||||||
|
|
||||||
|
The generated CSV contains one row per policy setting with these columns:
|
||||||
|
|
||||||
|
- `PolicyName`
|
||||||
|
- `Description`
|
||||||
|
- `SettingKey`
|
||||||
|
- `SettingValue`
|
||||||
|
- `Policy type`
|
||||||
|
- `Platform supported`
|
||||||
|
- `Created`
|
||||||
|
- `Last modified`
|
||||||
|
- `Scope tags`
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Parse and dedupe by policy, exporting with LF line endings
|
||||||
|
python intune.py CQRE.NET-2025-09-22.md -o CQRE.NET-2025-09-22.csv --dedupe --dedupe-scope policy --newline lf
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates `CQRE.NET-2025-09-22.csv` ready for analysis in Excel.
|
71
intune.py
71
intune.py
@@ -87,7 +87,7 @@ def parse_key_value_table(table_tag):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
|
def write_single_csv(policies, output_csv='policies.csv', dedupe=False, dedupe_scope="exact", lineterminator='\n'):
|
||||||
"""
|
"""
|
||||||
Writes a single CSV with columns in this order:
|
Writes a single CSV with columns in this order:
|
||||||
1) PolicyName
|
1) PolicyName
|
||||||
@@ -101,6 +101,11 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
|
|||||||
|
|
||||||
Each row corresponds to one Setting.
|
Each row corresponds to one Setting.
|
||||||
If dedupe=True, exact duplicate rows (across all policies) are skipped.
|
If dedupe=True, exact duplicate rows (across all policies) are skipped.
|
||||||
|
`dedupe_scope` controls how duplicates are identified:
|
||||||
|
- 'exact' -> full row match (default)
|
||||||
|
- 'policy' -> (PolicyName, SettingKey, SettingValue)
|
||||||
|
- 'global' -> (SettingKey, SettingValue, Policy type, Platform supported)
|
||||||
|
- `lineterminator`: line ending to use when writing the CSV (default `\n`, use `\r\n` for Windows-style).
|
||||||
"""
|
"""
|
||||||
# The exact order we want:
|
# The exact order we want:
|
||||||
columns = [
|
columns = [
|
||||||
@@ -114,12 +119,35 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
|
|||||||
"Last modified"
|
"Last modified"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def make_key(row_list):
|
||||||
|
if not dedupe:
|
||||||
|
return None
|
||||||
|
if dedupe_scope == "exact":
|
||||||
|
return tuple(row_list)
|
||||||
|
elif dedupe_scope == "policy":
|
||||||
|
# row_list layout: [PolicyName, Description, SettingKey, SettingValue, Policy type, Platform, Created, Last modified]
|
||||||
|
return (
|
||||||
|
row_list[0], # PolicyName
|
||||||
|
row_list[2], # SettingKey
|
||||||
|
row_list[3], # SettingValue
|
||||||
|
)
|
||||||
|
elif dedupe_scope == "global":
|
||||||
|
return (
|
||||||
|
row_list[2], # SettingKey
|
||||||
|
row_list[3], # SettingValue
|
||||||
|
row_list[4], # Policy type
|
||||||
|
row_list[5], # Platform supported
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback to exact if an unknown scope is provided
|
||||||
|
return tuple(row_list)
|
||||||
|
|
||||||
# De-duplication support (across the entire file)
|
# De-duplication support (across the entire file)
|
||||||
seen_rows = set() if dedupe else None
|
seen_rows = set() if dedupe else None
|
||||||
rows_written = 0
|
rows_written = 0
|
||||||
|
|
||||||
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
||||||
writer = csv.writer(f)
|
writer = csv.writer(f, lineterminator=lineterminator)
|
||||||
# Write header
|
# Write header
|
||||||
writer.writerow(columns)
|
writer.writerow(columns)
|
||||||
|
|
||||||
@@ -149,7 +177,7 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
|
|||||||
last_modified
|
last_modified
|
||||||
]
|
]
|
||||||
if seen_rows is not None:
|
if seen_rows is not None:
|
||||||
key = tuple(row)
|
key = make_key(row)
|
||||||
if key in seen_rows:
|
if key in seen_rows:
|
||||||
continue
|
continue
|
||||||
seen_rows.add(key)
|
seen_rows.add(key)
|
||||||
@@ -170,7 +198,7 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
|
|||||||
last_modified
|
last_modified
|
||||||
]
|
]
|
||||||
if seen_rows is not None:
|
if seen_rows is not None:
|
||||||
key = tuple(row)
|
key = make_key(row)
|
||||||
if key in seen_rows:
|
if key in seen_rows:
|
||||||
continue
|
continue
|
||||||
seen_rows.add(key)
|
seen_rows.add(key)
|
||||||
@@ -208,6 +236,23 @@ def main():
|
|||||||
"SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)."
|
"SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dedupe-scope",
|
||||||
|
choices=["exact", "policy", "global"],
|
||||||
|
default="exact",
|
||||||
|
help=(
|
||||||
|
"How to identify duplicates when --dedupe is set: 'exact' (full row), "
|
||||||
|
"'policy' (PolicyName+SettingKey+SettingValue), or 'global' (SettingKey+SettingValue+Policy type+Platform)."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--newline",
|
||||||
|
choices=["lf", "crlf"],
|
||||||
|
default="lf",
|
||||||
|
help=(
|
||||||
|
"Choose line endings for the output CSV: 'lf' (\\n, macOS/Linux) or 'crlf' (\\r\\n, Windows)."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -226,11 +271,23 @@ def main():
|
|||||||
|
|
||||||
policies = parse_tables_from_markdown(input_path)
|
policies = parse_tables_from_markdown(input_path)
|
||||||
|
|
||||||
rows_written = write_single_csv(policies, output_csv, dedupe=args.dedupe)
|
lineterminator = "\n" if args.newline == "lf" else "\r\n"
|
||||||
|
|
||||||
msg = f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'."
|
rows_written = write_single_csv(
|
||||||
|
policies,
|
||||||
|
output_csv,
|
||||||
|
dedupe=args.dedupe,
|
||||||
|
dedupe_scope=args.dedupe_scope,
|
||||||
|
lineterminator=lineterminator,
|
||||||
|
)
|
||||||
|
|
||||||
|
msg = (
|
||||||
|
f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'. "
|
||||||
|
f"(newline={args.newline}"
|
||||||
|
)
|
||||||
if args.dedupe:
|
if args.dedupe:
|
||||||
msg += " (duplicates removed)"
|
msg += f", dedupe={args.dedupe_scope}"
|
||||||
|
msg += ")"
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Reference in New Issue
Block a user