Allowing to specify input and output files

This commit is contained in:
2025-09-22 16:24:13 +02:00
parent 5d7acda429
commit 35b297a91a

216
intune.py Normal file
View File

@@ -0,0 +1,216 @@
import csv
from bs4 import BeautifulSoup
import argparse
import os
def parse_tables_from_markdown(md_file_path):
"""
Parses a Markdown/HTML file containing <h3> headings and 'table-settings' tables.
Assumes each policy:
- starts with an <h3> tag that has the policy name
- is followed by two <table class='table-settings'> elements:
1) 'Basics'
2) 'Settings'
Returns a list of dicts, each with:
{
'basic_info': { key -> value, ... },
'settings': { key -> value, ... }
}
"""
with open(md_file_path, 'r', encoding='utf-8') as f:
html = f.read()
soup = BeautifulSoup(html, 'lxml')
policies = []
# Find all <h3> tags, each is a policy heading
h3_tags = soup.find_all('h3')
for h3 in h3_tags:
policy_name = h3.get_text(strip=True)
# Look for the next two 'table-settings' tables (Basics and Settings)
policy_tables = []
sibling = h3.next_sibling
while sibling and len(policy_tables) < 2:
if (
sibling.name == 'table' and
'table-settings' in sibling.get('class', [])
):
policy_tables.append(sibling)
sibling = sibling.next_sibling
# If fewer than 2 tables, skip this policy
if len(policy_tables) < 2:
continue
basics_table = policy_tables[0]
settings_table = policy_tables[1]
# Parse out the Basic Info and Settings
basic_info = parse_key_value_table(basics_table)
settings_info = parse_key_value_table(settings_table)
# Put policy name into basic_info if not already present
basic_info.setdefault("PolicyName", policy_name)
policies.append({
'basic_info': basic_info,
'settings': settings_info,
})
return policies
def parse_key_value_table(table_tag):
"""
Given a <table> with class 'table-settings', parse each row (excluding
headers) into a { key: value } dict, where each row is <td>Key</td><td>Value</td>.
"""
data = {}
rows = table_tag.find_all('tr', recursive=False)
for row in rows:
# Skip table header and category rows
row_classes = row.get('class', [])
if 'table-header1' in row_classes or 'category-level1' in row_classes:
continue
cols = row.find_all('td', recursive=False)
if len(cols) < 2:
continue # can't parse a key-value from this row
key_text = cols[0].get_text(strip=True)
val_text = cols[1].get_text(strip=True)
data[key_text] = val_text
return data
def write_single_csv(policies, output_csv='policies.csv'):
"""
Writes a single CSV with columns in this order:
1) PolicyName
2) Description
3) SettingKey
4) SettingValue
5) Policy type (mapped from 'Profile type')
6) Platform supported
7) Created
8) Last modified
Each row corresponds to one Setting.
"""
# The exact order we want:
columns = [
"PolicyName",
"Description",
"SettingKey",
"SettingValue",
"Policy type",
"Platform supported",
"Created",
"Last modified"
]
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# Write header
writer.writerow(columns)
for policy in policies:
basic_info = policy['basic_info']
settings = policy['settings']
# Extract the relevant basic info fields
policy_name = basic_info.get("PolicyName", "")
description = basic_info.get("Description", "")
# The user wants "Policy type" in CSV, but it's "Profile type" in the data
policy_type = basic_info.get("Profile type", "")
platform_supported = basic_info.get("Platform supported", "")
created = basic_info.get("Created", "")
last_modified = basic_info.get("Last modified", "")
# If a policy has no settings, we could still write one row with empty SettingKey/Value
if not settings:
row = [
policy_name,
description,
"", # SettingKey
"", # SettingValue
policy_type,
platform_supported,
created,
last_modified
]
writer.writerow(row)
continue
# Otherwise, write one row per setting
for setting_key, setting_value in settings.items():
row = [
policy_name,
description,
setting_key,
setting_value,
policy_type,
platform_supported,
created,
last_modified
]
writer.writerow(row)
def main():
parser = argparse.ArgumentParser(
description=(
"Parse an Intune Markdown/HTML export with <h3> headings and two "
"<table class='table-settings'> sections (Basics + Settings) into a flat CSV."
)
)
parser.add_argument(
"input",
nargs="?",
help="Path to the Markdown/HTML file to parse (default: cqre.md)",
default=None,
)
parser.add_argument(
"-o", "--output",
help=(
"Path to output CSV file. If not provided, derives from input name "
"(e.g., input.md -> input.csv). If no input is given, defaults to policies-cqre.csv."
),
default=None,
)
args = parser.parse_args()
# Determine input path (keeps previous default behavior if none provided)
input_path = args.input or "cqre.md"
# Determine output path
if args.output:
output_csv = args.output
else:
if args.input:
base = os.path.splitext(os.path.basename(input_path))[0]
output_csv = f"{base}.csv"
else:
output_csv = "policies-cqre.csv"
policies = parse_tables_from_markdown(input_path)
# Count rows that will be written (one per setting, or one if no settings)
row_count = 0
for p in policies:
settings = p.get("settings") or {}
row_count += max(len(settings), 1)
write_single_csv(policies, output_csv)
print(
f"Done! Parsed {len(policies)} policies and wrote {row_count} rows to '{output_csv}'."
)
if __name__ == "__main__":
main()