Allowing to specify input and output files

2025-09-22 16:24:13 +02:00
parent 5d7acda429
commit 35b297a91a
1 changed files with 216 additions and 0 deletions
--- a/intune.py
+++ b/intune.py
@@ -0,0 +1,216 @@
+import csv
+from bs4 import BeautifulSoup
+import argparse
+import os
+
+def parse_tables_from_markdown(md_file_path):
+    """
+    Parses a Markdown/HTML file containing <h3> headings and 'table-settings' tables.
+    
+    Assumes each policy:
+      - starts with an <h3> tag that has the policy name
+      - is followed by two <table class='table-settings'> elements:
+         1) 'Basics'
+         2) 'Settings'
+    
+    Returns a list of dicts, each with:
+      {
+        'basic_info': { key -> value, ... },
+        'settings': { key -> value, ... }
+      }
+    """
+    with open(md_file_path, 'r', encoding='utf-8') as f:
+        html = f.read()
+
+    soup = BeautifulSoup(html, 'lxml')
+    policies = []
+
+    # Find all <h3> tags, each is a policy heading
+    h3_tags = soup.find_all('h3')
+
+    for h3 in h3_tags:
+        policy_name = h3.get_text(strip=True)
+
+        # Look for the next two 'table-settings' tables (Basics and Settings)
+        policy_tables = []
+        sibling = h3.next_sibling
+        while sibling and len(policy_tables) < 2:
+            if (
+                sibling.name == 'table' and
+                'table-settings' in sibling.get('class', [])
+            ):
+                policy_tables.append(sibling)
+            sibling = sibling.next_sibling
+
+        # If fewer than 2 tables, skip this policy
+        if len(policy_tables) < 2:
+            continue
+
+        basics_table = policy_tables[0]
+        settings_table = policy_tables[1]
+
+        # Parse out the Basic Info and Settings
+        basic_info = parse_key_value_table(basics_table)
+        settings_info = parse_key_value_table(settings_table)
+
+        # Put policy name into basic_info if not already present
+        basic_info.setdefault("PolicyName", policy_name)
+
+        policies.append({
+            'basic_info': basic_info,
+            'settings': settings_info,
+        })
+
+    return policies
+
+def parse_key_value_table(table_tag):
+    """
+    Given a <table> with class 'table-settings', parse each row (excluding
+    headers) into a { key: value } dict, where each row is <td>Key</td><td>Value</td>.
+    """
+    data = {}
+    rows = table_tag.find_all('tr', recursive=False)
+
+    for row in rows:
+        # Skip table header and category rows
+        row_classes = row.get('class', [])
+        if 'table-header1' in row_classes or 'category-level1' in row_classes:
+            continue
+
+        cols = row.find_all('td', recursive=False)
+        if len(cols) < 2:
+            continue  # can't parse a key-value from this row
+
+        key_text = cols[0].get_text(strip=True)
+        val_text = cols[1].get_text(strip=True)
+        data[key_text] = val_text
+
+    return data
+
+def write_single_csv(policies, output_csv='policies.csv'):
+    """
+    Writes a single CSV with columns in this order:
+      1) PolicyName
+      2) Description
+      3) SettingKey
+      4) SettingValue
+      5) Policy type (mapped from 'Profile type')
+      6) Platform supported
+      7) Created
+      8) Last modified
+
+    Each row corresponds to one Setting.
+    """
+    # The exact order we want:
+    columns = [
+        "PolicyName",
+        "Description",
+        "SettingKey",
+        "SettingValue",
+        "Policy type",
+        "Platform supported",
+        "Created",
+        "Last modified"
+    ]
+
+    with open(output_csv, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.writer(f)
+        # Write header
+        writer.writerow(columns)
+
+        for policy in policies:
+            basic_info = policy['basic_info']
+            settings = policy['settings']
+
+            # Extract the relevant basic info fields
+            policy_name = basic_info.get("PolicyName", "")
+            description = basic_info.get("Description", "")
+            # The user wants "Policy type" in CSV, but it's "Profile type" in the data
+            policy_type = basic_info.get("Profile type", "")
+            platform_supported = basic_info.get("Platform supported", "")
+            created = basic_info.get("Created", "")
+            last_modified = basic_info.get("Last modified", "")
+
+            # If a policy has no settings, we could still write one row with empty SettingKey/Value
+            if not settings:
+                row = [
+                    policy_name,
+                    description,
+                    "",  # SettingKey
+                    "",  # SettingValue
+                    policy_type,
+                    platform_supported,
+                    created,
+                    last_modified
+                ]
+                writer.writerow(row)
+                continue
+
+            # Otherwise, write one row per setting
+            for setting_key, setting_value in settings.items():
+                row = [
+                    policy_name,
+                    description,
+                    setting_key,
+                    setting_value,
+                    policy_type,
+                    platform_supported,
+                    created,
+                    last_modified
+                ]
+                writer.writerow(row)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Parse an Intune Markdown/HTML export with <h3> headings and two "
+            "<table class='table-settings'> sections (Basics + Settings) into a flat CSV."
+        )
+    )
+    parser.add_argument(
+        "input",
+        nargs="?",
+        help="Path to the Markdown/HTML file to parse (default: cqre.md)",
+        default=None,
+    )
+    parser.add_argument(
+        "-o", "--output",
+        help=(
+            "Path to output CSV file. If not provided, derives from input name "
+            "(e.g., input.md -> input.csv). If no input is given, defaults to policies-cqre.csv."
+        ),
+        default=None,
+    )
+
+    args = parser.parse_args()
+
+    # Determine input path (keeps previous default behavior if none provided)
+    input_path = args.input or "cqre.md"
+
+    # Determine output path
+    if args.output:
+        output_csv = args.output
+    else:
+        if args.input:
+            base = os.path.splitext(os.path.basename(input_path))[0]
+            output_csv = f"{base}.csv"
+        else:
+            output_csv = "policies-cqre.csv"
+
+    policies = parse_tables_from_markdown(input_path)
+
+    # Count rows that will be written (one per setting, or one if no settings)
+    row_count = 0
+    for p in policies:
+        settings = p.get("settings") or {}
+        row_count += max(len(settings), 1)
+
+    write_single_csv(policies, output_csv)
+
+    print(
+        f"Done! Parsed {len(policies)} policies and wrote {row_count} rows to '{output_csv}'."
+    )
+
+if __name__ == "__main__":
+    main()