import csv from bs4 import BeautifulSoup import argparse import os def parse_tables_from_markdown(md_file_path): """ Parses a Markdown/HTML file containing

headings and 'table-settings' tables. Assumes each policy: - starts with an

tag that has the policy name - is followed by two elements: 1) 'Basics' 2) 'Settings' Returns a list of dicts, each with: { 'basic_info': { key -> value, ... }, 'settings': { key -> value, ... } } """ with open(md_file_path, 'r', encoding='utf-8') as f: html = f.read() soup = BeautifulSoup(html, 'lxml') policies = [] # Find all
tags, each is a policy heading h3_tags = soup.find_all('h3') for h3 in h3_tags: policy_name = h3.get_text(strip=True) # Look for the next two 'table-settings' tables (Basics and Settings) policy_tables = [] sibling = h3.next_sibling while sibling and len(policy_tables) < 2: if ( sibling.name == 'table' and 'table-settings' in sibling.get('class', []) ): policy_tables.append(sibling) sibling = sibling.next_sibling # If fewer than 2 tables, skip this policy if len(policy_tables) < 2: continue basics_table = policy_tables[0] settings_table = policy_tables[1] # Parse out the Basic Info and Settings basic_info = parse_key_value_table(basics_table) settings_info = parse_key_value_table(settings_table) # Put policy name into basic_info if not already present basic_info.setdefault("PolicyName", policy_name) policies.append({ 'basic_info': basic_info, 'settings': settings_info, }) return policies def parse_key_value_table(table_tag): """ Given a
with class 'table-settings', parse each row (excluding headers) into a { key: value } dict, where each row is . """ data = {} rows = table_tag.find_all('tr', recursive=False) for row in rows: # Skip table header and category rows row_classes = row.get('class', []) if 'table-header1' in row_classes or 'category-level1' in row_classes: continue cols = row.find_all('td', recursive=False) if len(cols) < 2: continue # can't parse a key-value from this row key_text = cols[0].get_text(strip=True) val_text = cols[1].get_text(strip=True) data[key_text] = val_text return data def write_single_csv(policies, output_csv='policies.csv'): """ Writes a single CSV with columns in this order: 1) PolicyName 2) Description 3) SettingKey 4) SettingValue 5) Policy type (mapped from 'Profile type') 6) Platform supported 7) Created 8) Last modified Each row corresponds to one Setting. """ # The exact order we want: columns = [ "PolicyName", "Description", "SettingKey", "SettingValue", "Policy type", "Platform supported", "Created", "Last modified" ] with open(output_csv, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) # Write header writer.writerow(columns) for policy in policies: basic_info = policy['basic_info'] settings = policy['settings'] # Extract the relevant basic info fields policy_name = basic_info.get("PolicyName", "") description = basic_info.get("Description", "") # The user wants "Policy type" in CSV, but it's "Profile type" in the data policy_type = basic_info.get("Profile type", "") platform_supported = basic_info.get("Platform supported", "") created = basic_info.get("Created", "") last_modified = basic_info.get("Last modified", "") # If a policy has no settings, we could still write one row with empty SettingKey/Value if not settings: row = [ policy_name, description, "", # SettingKey "", # SettingValue policy_type, platform_supported, created, last_modified ] writer.writerow(row) continue # Otherwise, write one row per setting for setting_key, setting_value in settings.items(): row = [ policy_name, description, setting_key, setting_value, policy_type, platform_supported, created, last_modified ] writer.writerow(row) def main(): parser = argparse.ArgumentParser( description=( "Parse an Intune Markdown/HTML export with
headings and two " "
Key Value
sections (Basics + Settings) into a flat CSV." ) ) parser.add_argument( "input", nargs="?", help="Path to the Markdown/HTML file to parse (default: cqre.md)", default=None, ) parser.add_argument( "-o", "--output", help=( "Path to output CSV file. If not provided, derives from input name " "(e.g., input.md -> input.csv). If no input is given, defaults to policies-cqre.csv." ), default=None, ) args = parser.parse_args() # Determine input path (keeps previous default behavior if none provided) input_path = args.input or "cqre.md" # Determine output path if args.output: output_csv = args.output else: if args.input: base = os.path.splitext(os.path.basename(input_path))[0] output_csv = f"{base}.csv" else: output_csv = "policies-cqre.csv" policies = parse_tables_from_markdown(input_path) # Count rows that will be written (one per setting, or one if no settings) row_count = 0 for p in policies: settings = p.get("settings") or {} row_count += max(len(settings), 1) write_single_csv(policies, output_csv) print( f"Done! Parsed {len(policies)} policies and wrote {row_count} rows to '{output_csv}'." ) if name == "main": main()