diff --git a/README.md b/README.md
index b0d2a7a..65f4d71 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,63 @@
# IntunePolicyParser
-Parsing policies from Intune to Excel
\ No newline at end of file
+
+A utility to parse **Markdown/HTML documentation exports** from the [IntuneManagement](https://github.com/...) tool into flat CSV files that can be further analyzed in Excel or Power BI.
+
+## Source files
+
+This parser expects as input a **Documentation export** from the IntuneManagement tool. Use the `Export Documentation (Markdown)` feature, which produces a `.md` file containing:
+
+- `
` sections for each policy,
+- two `
` blocks under each heading (Basics + Settings).
+
+## Usage
+
+```bash
+python intune.py [input_file.md] [-o OUTPUT.csv] [--dedupe] [--dedupe-scope {exact,policy,global}] [--newline {lf,crlf}]
+```
+
+### Arguments
+
+- `input_file.md`
+ Path to the Intune documentation export (Markdown/HTML).
+ Defaults to `cqre.md`.
+
+- `-o, --output OUTPUT.csv`
+ Output CSV file. If not provided, the name is derived from the input (e.g. `input.md -> input.csv`).
+
+- `--dedupe`
+ Enable removal of duplicate rows.
+
+- `--dedupe-scope {exact,policy,global}`
+ How duplicates are identified:
+ - `exact` → full row must match (default).
+ - `policy` → unique per (PolicyName + SettingKey + SettingValue).
+ - `global` → unique per (SettingKey + SettingValue + Policy type + Platform).
+
+- `--newline {lf,crlf}`
+ Choose line endings for the output CSV:
+ - `lf` (Unix/macOS/Linux, default)
+ - `crlf` (Windows)
+
+## Output format
+
+The generated CSV contains one row per policy setting with these columns:
+
+- `PolicyName`
+- `Description`
+- `SettingKey`
+- `SettingValue`
+- `Policy type`
+- `Platform supported`
+- `Created`
+- `Last modified`
+- `Scope tags`
+
+## Example
+
+```bash
+# Parse and dedupe by policy, exporting with LF line endings
+python intune.py CQRE.NET-2025-09-22.md -o CQRE.NET-2025-09-22.csv --dedupe --dedupe-scope policy --newline lf
+```
+
+This creates `CQRE.NET-2025-09-22.csv` ready for analysis in Excel.
\ No newline at end of file
diff --git a/intune.py b/intune.py
index 019a3d3..0f6a93c 100644
--- a/intune.py
+++ b/intune.py
@@ -87,7 +87,7 @@ def parse_key_value_table(table_tag):
return data
-def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
+def write_single_csv(policies, output_csv='policies.csv', dedupe=False, dedupe_scope="exact", lineterminator='\n'):
"""
Writes a single CSV with columns in this order:
1) PolicyName
@@ -101,6 +101,11 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
Each row corresponds to one Setting.
If dedupe=True, exact duplicate rows (across all policies) are skipped.
+ `dedupe_scope` controls how duplicates are identified:
+ - 'exact' -> full row match (default)
+ - 'policy' -> (PolicyName, SettingKey, SettingValue)
+ - 'global' -> (SettingKey, SettingValue, Policy type, Platform supported)
+ - `lineterminator`: line ending to use when writing the CSV (default `\n`, use `\r\n` for Windows-style).
"""
# The exact order we want:
columns = [
@@ -114,12 +119,35 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
"Last modified"
]
+ def make_key(row_list):
+ if not dedupe:
+ return None
+ if dedupe_scope == "exact":
+ return tuple(row_list)
+ elif dedupe_scope == "policy":
+ # row_list layout: [PolicyName, Description, SettingKey, SettingValue, Policy type, Platform, Created, Last modified]
+ return (
+ row_list[0], # PolicyName
+ row_list[2], # SettingKey
+ row_list[3], # SettingValue
+ )
+ elif dedupe_scope == "global":
+ return (
+ row_list[2], # SettingKey
+ row_list[3], # SettingValue
+ row_list[4], # Policy type
+ row_list[5], # Platform supported
+ )
+ else:
+ # Fallback to exact if an unknown scope is provided
+ return tuple(row_list)
+
# De-duplication support (across the entire file)
seen_rows = set() if dedupe else None
rows_written = 0
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
- writer = csv.writer(f)
+ writer = csv.writer(f, lineterminator=lineterminator)
# Write header
writer.writerow(columns)
@@ -149,7 +177,7 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
last_modified
]
if seen_rows is not None:
- key = tuple(row)
+ key = make_key(row)
if key in seen_rows:
continue
seen_rows.add(key)
@@ -170,7 +198,7 @@ def write_single_csv(policies, output_csv='policies.csv', dedupe=False):
last_modified
]
if seen_rows is not None:
- key = tuple(row)
+ key = make_key(row)
if key in seen_rows:
continue
seen_rows.add(key)
@@ -208,6 +236,23 @@ def main():
"SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)."
),
)
+ parser.add_argument(
+ "--dedupe-scope",
+ choices=["exact", "policy", "global"],
+ default="exact",
+ help=(
+ "How to identify duplicates when --dedupe is set: 'exact' (full row), "
+ "'policy' (PolicyName+SettingKey+SettingValue), or 'global' (SettingKey+SettingValue+Policy type+Platform)."
+ ),
+ )
+ parser.add_argument(
+ "--newline",
+ choices=["lf", "crlf"],
+ default="lf",
+ help=(
+ "Choose line endings for the output CSV: 'lf' (\\n, macOS/Linux) or 'crlf' (\\r\\n, Windows)."
+ ),
+ )
args = parser.parse_args()
@@ -226,11 +271,23 @@ def main():
policies = parse_tables_from_markdown(input_path)
- rows_written = write_single_csv(policies, output_csv, dedupe=args.dedupe)
+ lineterminator = "\n" if args.newline == "lf" else "\r\n"
- msg = f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'."
+ rows_written = write_single_csv(
+ policies,
+ output_csv,
+ dedupe=args.dedupe,
+ dedupe_scope=args.dedupe_scope,
+ lineterminator=lineterminator,
+ )
+
+ msg = (
+ f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'. "
+ f"(newline={args.newline}"
+ )
if args.dedupe:
- msg += " (duplicates removed)"
+ msg += f", dedupe={args.dedupe_scope}"
+ msg += ")"
print(msg)
if __name__ == "__main__":