import csv from bs4 import BeautifulSoup import argparse import os def parse_tables_from_markdown(md_file_path): """ Parses a Markdown/HTML file containing
Key | Value | . """ data = {} rows = table_tag.find_all('tr', recursive=False) for row in rows: # Skip table header and category rows row_classes = row.get('class', []) if 'table-header1' in row_classes or 'category-level1' in row_classes: continue cols = row.find_all('td', recursive=False) if len(cols) < 2: continue # can't parse a key-value from this row key_text = cols[0].get_text(strip=True) val_text = cols[1].get_text(strip=True) data[key_text] = val_text return data def write_single_csv(policies, output_csv='policies.csv', dedupe=False, dedupe_scope="exact", lineterminator='\n'): """ Writes a single CSV with columns in this order: 1) PolicyName 2) Description 3) SettingKey 4) SettingValue 5) Policy type (mapped from 'Profile type') 6) Platform supported 7) Created 8) Last modified Each row corresponds to one Setting. If dedupe=True, exact duplicate rows (across all policies) are skipped. `dedupe_scope` controls how duplicates are identified: - 'exact' -> full row match (default) - 'policy' -> (PolicyName, SettingKey, SettingValue) - 'global' -> (SettingKey, SettingValue, Policy type, Platform supported) - `lineterminator`: line ending to use when writing the CSV (default `\n`, use `\r\n` for Windows-style). """ # The exact order we want: columns = [ "PolicyName", "Description", "SettingKey", "SettingValue", "Policy type", "Platform supported", "Created", "Last modified" ] def make_key(row_list): if not dedupe: return None if dedupe_scope == "exact": return tuple(row_list) elif dedupe_scope == "policy": # row_list layout: [PolicyName, Description, SettingKey, SettingValue, Policy type, Platform, Created, Last modified] return ( row_list[0], # PolicyName row_list[2], # SettingKey row_list[3], # SettingValue ) elif dedupe_scope == "global": return ( row_list[2], # SettingKey row_list[3], # SettingValue row_list[4], # Policy type row_list[5], # Platform supported ) else: # Fallback to exact if an unknown scope is provided return tuple(row_list) # De-duplication support (across the entire file) seen_rows = set() if dedupe else None rows_written = 0 with open(output_csv, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f, lineterminator=lineterminator) # Write header writer.writerow(columns) for policy in policies: basic_info = policy['basic_info'] settings = policy['settings'] # Extract the relevant basic info fields policy_name = basic_info.get("PolicyName", "") description = basic_info.get("Description", "") # The user wants "Policy type" in CSV, but it's "Profile type" in the data policy_type = basic_info.get("Profile type", "") platform_supported = basic_info.get("Platform supported", "") created = basic_info.get("Created", "") last_modified = basic_info.get("Last modified", "") # If a policy has no settings, we could still write one row with empty SettingKey/Value if not settings: row = [ policy_name, description, "", # SettingKey "", # SettingValue policy_type, platform_supported, created, last_modified ] if seen_rows is not None: key = make_key(row) if key in seen_rows: continue seen_rows.add(key) writer.writerow(row) rows_written += 1 continue # Otherwise, write one row per setting for setting_key, setting_value in settings.items(): row = [ policy_name, description, setting_key, setting_value, policy_type, platform_supported, created, last_modified ] if seen_rows is not None: key = make_key(row) if key in seen_rows: continue seen_rows.add(key) writer.writerow(row) rows_written += 1 return rows_written def main(): parser = argparse.ArgumentParser( description=( "Parse an Intune Markdown/HTML export with