import csv from bs4 import BeautifulSoup import argparse import os def parse_tables_from_markdown(md_file_path): """ Parses a Markdown/HTML file containing

headings and 'table-settings' tables. Assumes each policy: - starts with an

tag that has the policy name - is followed by two elements: 1) 'Basics' 2) 'Settings' Returns a list of dicts, each with: { 'basic_info': { key -> value, ... }, 'settings': { key -> value, ... } } """ with open(md_file_path, 'r', encoding='utf-8') as f: html = f.read() soup = BeautifulSoup(html, 'lxml') policies = [] # Find all
tags, each is a policy heading h3_tags = soup.find_all('h3') for h3 in h3_tags: policy_name = h3.get_text(strip=True) # Look for the next two 'table-settings' tables (Basics and Settings) policy_tables = [] sibling = h3.next_sibling while sibling and len(policy_tables) < 2: if ( sibling.name == 'table' and 'table-settings' in sibling.get('class', []) ): policy_tables.append(sibling) sibling = sibling.next_sibling # If fewer than 2 tables, skip this policy if len(policy_tables) < 2: continue basics_table = policy_tables[0] settings_table = policy_tables[1] # Parse out the Basic Info and Settings basic_info = parse_key_value_table(basics_table) settings_info = parse_key_value_table(settings_table) # Put policy name into basic_info if not already present basic_info.setdefault("PolicyName", policy_name) policies.append({ 'basic_info': basic_info, 'settings': settings_info, }) return policies def parse_key_value_table(table_tag): """ Given a
with class 'table-settings', parse each row (excluding headers) into a { key: value } dict, where each row is . """ data = {} rows = table_tag.find_all('tr', recursive=False) for row in rows: # Skip table header and category rows row_classes = row.get('class', []) if 'table-header1' in row_classes or 'category-level1' in row_classes: continue cols = row.find_all('td', recursive=False) if len(cols) < 2: continue # can't parse a key-value from this row key_text = cols[0].get_text(strip=True) val_text = cols[1].get_text(strip=True) data[key_text] = val_text return data def write_single_csv(policies, output_csv='policies.csv', dedupe=False, dedupe_scope="exact", lineterminator='\n'): """ Writes a single CSV with columns in this order: 1) PolicyName 2) Description 3) SettingKey 4) SettingValue 5) Policy type (mapped from 'Profile type') 6) Platform supported 7) Created 8) Last modified Each row corresponds to one Setting. If dedupe=True, exact duplicate rows (across all policies) are skipped. `dedupe_scope` controls how duplicates are identified: - 'exact' -> full row match (default) - 'policy' -> (PolicyName, SettingKey, SettingValue) - 'global' -> (SettingKey, SettingValue, Policy type, Platform supported) - `lineterminator`: line ending to use when writing the CSV (default `\n`, use `\r\n` for Windows-style). """ # The exact order we want: columns = [ "PolicyName", "Description", "SettingKey", "SettingValue", "Policy type", "Platform supported", "Created", "Last modified" ] def make_key(row_list): if not dedupe: return None if dedupe_scope == "exact": return tuple(row_list) elif dedupe_scope == "policy": # row_list layout: [PolicyName, Description, SettingKey, SettingValue, Policy type, Platform, Created, Last modified] return ( row_list[0], # PolicyName row_list[2], # SettingKey row_list[3], # SettingValue ) elif dedupe_scope == "global": return ( row_list[2], # SettingKey row_list[3], # SettingValue row_list[4], # Policy type row_list[5], # Platform supported ) else: # Fallback to exact if an unknown scope is provided return tuple(row_list) # De-duplication support (across the entire file) seen_rows = set() if dedupe else None rows_written = 0 with open(output_csv, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f, lineterminator=lineterminator) # Write header writer.writerow(columns) for policy in policies: basic_info = policy['basic_info'] settings = policy['settings'] # Extract the relevant basic info fields policy_name = basic_info.get("PolicyName", "") description = basic_info.get("Description", "") # The user wants "Policy type" in CSV, but it's "Profile type" in the data policy_type = basic_info.get("Profile type", "") platform_supported = basic_info.get("Platform supported", "") created = basic_info.get("Created", "") last_modified = basic_info.get("Last modified", "") # If a policy has no settings, we could still write one row with empty SettingKey/Value if not settings: row = [ policy_name, description, "", # SettingKey "", # SettingValue policy_type, platform_supported, created, last_modified ] if seen_rows is not None: key = make_key(row) if key in seen_rows: continue seen_rows.add(key) writer.writerow(row) rows_written += 1 continue # Otherwise, write one row per setting for setting_key, setting_value in settings.items(): row = [ policy_name, description, setting_key, setting_value, policy_type, platform_supported, created, last_modified ] if seen_rows is not None: key = make_key(row) if key in seen_rows: continue seen_rows.add(key) writer.writerow(row) rows_written += 1 return rows_written def main(): parser = argparse.ArgumentParser( description=( "Parse an Intune Markdown/HTML export with
headings and two " "
Key Value
sections (Basics + Settings) into a flat CSV." ) ) parser.add_argument( "input", nargs="?", help="Path to the Markdown/HTML file to parse (default: cqre.md)", default=None, ) parser.add_argument( "-o", "--output", help=( "Path to output CSV file. If not provided, derives from input name " "(e.g., input.md -> input.csv). If no input is given, defaults to policies-cqre.csv." ), default=None, ) parser.add_argument( "--dedupe", action="store_true", help=( "Drop exact duplicate rows in the output (by the full row: PolicyName, Description, " "SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)." ), ) parser.add_argument( "--dedupe-scope", choices=["exact", "policy", "global"], default="exact", help=( "How to identify duplicates when --dedupe is set: 'exact' (full row), " "'policy' (PolicyName+SettingKey+SettingValue), or 'global' (SettingKey+SettingValue+Policy type+Platform)." ), ) parser.add_argument( "--newline", choices=["lf", "crlf"], default="lf", help=( "Choose line endings for the output CSV: 'lf' (\\n, macOS/Linux) or 'crlf' (\\r\\n, Windows)." ), ) args = parser.parse_args() # Determine input path (keeps previous default behavior if none provided) input_path = args.input or "cqre.md" # Determine output path if args.output: output_csv = args.output else: if args.input: base = os.path.splitext(os.path.basename(input_path))[0] output_csv = f"{base}.csv" else: output_csv = "policies-cqre.csv" policies = parse_tables_from_markdown(input_path) lineterminator = "\n" if args.newline == "lf" else "\r\n" rows_written = write_single_csv( policies, output_csv, dedupe=args.dedupe, dedupe_scope=args.dedupe_scope, lineterminator=lineterminator, ) msg = ( f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'. " f"(newline={args.newline}" ) if args.dedupe: msg += f", dedupe={args.dedupe_scope}" msg += ")" print(msg) if name == "main": main()