Multi formats input
This commit is contained in:
294
parseIntuneSettingsMarkdown.py
Normal file
294
parseIntuneSettingsMarkdown.py
Normal file
@@ -0,0 +1,294 @@
|
||||
import csv
|
||||
from bs4 import BeautifulSoup
|
||||
import argparse
|
||||
import os
|
||||
|
||||
def parse_tables_from_markdown(md_file_path):
|
||||
"""
|
||||
Parses a Markdown/HTML file containing <h3> headings and 'table-settings' tables.
|
||||
|
||||
Assumes each policy:
|
||||
- starts with an <h3> tag that has the policy name
|
||||
- is followed by two <table class='table-settings'> elements:
|
||||
1) 'Basics'
|
||||
2) 'Settings'
|
||||
|
||||
Returns a list of dicts, each with:
|
||||
{
|
||||
'basic_info': { key -> value, ... },
|
||||
'settings': { key -> value, ... }
|
||||
}
|
||||
"""
|
||||
with open(md_file_path, 'r', encoding='utf-8') as f:
|
||||
html = f.read()
|
||||
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
policies = []
|
||||
|
||||
# Find all <h3> tags, each is a policy heading
|
||||
h3_tags = soup.find_all('h3')
|
||||
|
||||
for h3 in h3_tags:
|
||||
policy_name = h3.get_text(strip=True)
|
||||
|
||||
# Look for the next two 'table-settings' tables (Basics and Settings)
|
||||
policy_tables = []
|
||||
sibling = h3.next_sibling
|
||||
while sibling and len(policy_tables) < 2:
|
||||
if (
|
||||
sibling.name == 'table' and
|
||||
'table-settings' in sibling.get('class', [])
|
||||
):
|
||||
policy_tables.append(sibling)
|
||||
sibling = sibling.next_sibling
|
||||
|
||||
# If fewer than 2 tables, skip this policy
|
||||
if len(policy_tables) < 2:
|
||||
continue
|
||||
|
||||
basics_table = policy_tables[0]
|
||||
settings_table = policy_tables[1]
|
||||
|
||||
# Parse out the Basic Info and Settings
|
||||
basic_info = parse_key_value_table(basics_table)
|
||||
settings_info = parse_key_value_table(settings_table)
|
||||
|
||||
# Put policy name into basic_info if not already present
|
||||
basic_info.setdefault("PolicyName", policy_name)
|
||||
|
||||
policies.append({
|
||||
'basic_info': basic_info,
|
||||
'settings': settings_info,
|
||||
})
|
||||
|
||||
return policies
|
||||
|
||||
def parse_key_value_table(table_tag):
|
||||
"""
|
||||
Given a <table> with class 'table-settings', parse each row (excluding
|
||||
headers) into a { key: value } dict, where each row is <td>Key</td><td>Value</td>.
|
||||
"""
|
||||
data = {}
|
||||
rows = table_tag.find_all('tr', recursive=False)
|
||||
|
||||
for row in rows:
|
||||
# Skip table header and category rows
|
||||
row_classes = row.get('class', [])
|
||||
if 'table-header1' in row_classes or 'category-level1' in row_classes:
|
||||
continue
|
||||
|
||||
cols = row.find_all('td', recursive=False)
|
||||
if len(cols) < 2:
|
||||
continue # can't parse a key-value from this row
|
||||
|
||||
key_text = cols[0].get_text(strip=True)
|
||||
val_text = cols[1].get_text(strip=True)
|
||||
data[key_text] = val_text
|
||||
|
||||
return data
|
||||
|
||||
def write_single_csv(policies, output_csv='policies.csv', dedupe=False, dedupe_scope="exact", lineterminator='\n'):
|
||||
"""
|
||||
Writes a single CSV with columns in this order:
|
||||
1) PolicyName
|
||||
2) Description
|
||||
3) SettingKey
|
||||
4) SettingValue
|
||||
5) Policy type (mapped from 'Profile type')
|
||||
6) Platform supported
|
||||
7) Created
|
||||
8) Last modified
|
||||
|
||||
Each row corresponds to one Setting.
|
||||
If dedupe=True, exact duplicate rows (across all policies) are skipped.
|
||||
`dedupe_scope` controls how duplicates are identified:
|
||||
- 'exact' -> full row match (default)
|
||||
- 'policy' -> (PolicyName, SettingKey, SettingValue)
|
||||
- 'global' -> (SettingKey, SettingValue, Policy type, Platform supported)
|
||||
- `lineterminator`: line ending to use when writing the CSV (default `\n`, use `\r\n` for Windows-style).
|
||||
"""
|
||||
# The exact order we want:
|
||||
columns = [
|
||||
"PolicyName",
|
||||
"Description",
|
||||
"SettingKey",
|
||||
"SettingValue",
|
||||
"Policy type",
|
||||
"Platform supported",
|
||||
"Created",
|
||||
"Last modified"
|
||||
]
|
||||
|
||||
def make_key(row_list):
|
||||
if not dedupe:
|
||||
return None
|
||||
if dedupe_scope == "exact":
|
||||
return tuple(row_list)
|
||||
elif dedupe_scope == "policy":
|
||||
# row_list layout: [PolicyName, Description, SettingKey, SettingValue, Policy type, Platform, Created, Last modified]
|
||||
return (
|
||||
row_list[0], # PolicyName
|
||||
row_list[2], # SettingKey
|
||||
row_list[3], # SettingValue
|
||||
)
|
||||
elif dedupe_scope == "global":
|
||||
return (
|
||||
row_list[2], # SettingKey
|
||||
row_list[3], # SettingValue
|
||||
row_list[4], # Policy type
|
||||
row_list[5], # Platform supported
|
||||
)
|
||||
else:
|
||||
# Fallback to exact if an unknown scope is provided
|
||||
return tuple(row_list)
|
||||
|
||||
# De-duplication support (across the entire file)
|
||||
seen_rows = set() if dedupe else None
|
||||
rows_written = 0
|
||||
|
||||
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f, lineterminator=lineterminator)
|
||||
# Write header
|
||||
writer.writerow(columns)
|
||||
|
||||
for policy in policies:
|
||||
basic_info = policy['basic_info']
|
||||
settings = policy['settings']
|
||||
|
||||
# Extract the relevant basic info fields
|
||||
policy_name = basic_info.get("PolicyName", "")
|
||||
description = basic_info.get("Description", "")
|
||||
# The user wants "Policy type" in CSV, but it's "Profile type" in the data
|
||||
policy_type = basic_info.get("Profile type", "")
|
||||
platform_supported = basic_info.get("Platform supported", "")
|
||||
created = basic_info.get("Created", "")
|
||||
last_modified = basic_info.get("Last modified", "")
|
||||
|
||||
# If a policy has no settings, we could still write one row with empty SettingKey/Value
|
||||
if not settings:
|
||||
row = [
|
||||
policy_name,
|
||||
description,
|
||||
"", # SettingKey
|
||||
"", # SettingValue
|
||||
policy_type,
|
||||
platform_supported,
|
||||
created,
|
||||
last_modified
|
||||
]
|
||||
if seen_rows is not None:
|
||||
key = make_key(row)
|
||||
if key in seen_rows:
|
||||
continue
|
||||
seen_rows.add(key)
|
||||
writer.writerow(row)
|
||||
rows_written += 1
|
||||
continue
|
||||
|
||||
# Otherwise, write one row per setting
|
||||
for setting_key, setting_value in settings.items():
|
||||
row = [
|
||||
policy_name,
|
||||
description,
|
||||
setting_key,
|
||||
setting_value,
|
||||
policy_type,
|
||||
platform_supported,
|
||||
created,
|
||||
last_modified
|
||||
]
|
||||
if seen_rows is not None:
|
||||
key = make_key(row)
|
||||
if key in seen_rows:
|
||||
continue
|
||||
seen_rows.add(key)
|
||||
writer.writerow(row)
|
||||
rows_written += 1
|
||||
|
||||
return rows_written
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Parse an Intune Markdown/HTML export with <h3> headings and two "
|
||||
"<table class='table-settings'> sections (Basics + Settings) into a flat CSV."
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
nargs="?",
|
||||
help="Path to the Markdown/HTML file to parse (default: cqre.md)",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output",
|
||||
help=(
|
||||
"Path to output CSV file. If not provided, derives from input name "
|
||||
"(e.g., input.md -> input.csv). If no input is given, defaults to policies-cqre.csv."
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dedupe",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Drop exact duplicate rows in the output (by the full row: PolicyName, Description, "
|
||||
"SettingKey, SettingValue, Policy type, Platform supported, Created, Last modified)."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dedupe-scope",
|
||||
choices=["exact", "policy", "global"],
|
||||
default="exact",
|
||||
help=(
|
||||
"How to identify duplicates when --dedupe is set: 'exact' (full row), "
|
||||
"'policy' (PolicyName+SettingKey+SettingValue), or 'global' (SettingKey+SettingValue+Policy type+Platform)."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--newline",
|
||||
choices=["lf", "crlf"],
|
||||
default="lf",
|
||||
help=(
|
||||
"Choose line endings for the output CSV: 'lf' (\\n, macOS/Linux) or 'crlf' (\\r\\n, Windows)."
|
||||
),
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine input path (keeps previous default behavior if none provided)
|
||||
input_path = args.input or "cqre.md"
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_csv = args.output
|
||||
else:
|
||||
if args.input:
|
||||
base = os.path.splitext(os.path.basename(input_path))[0]
|
||||
output_csv = f"{base}.csv"
|
||||
else:
|
||||
output_csv = "policies-cqre.csv"
|
||||
|
||||
policies = parse_tables_from_markdown(input_path)
|
||||
|
||||
lineterminator = "\n" if args.newline == "lf" else "\r\n"
|
||||
|
||||
rows_written = write_single_csv(
|
||||
policies,
|
||||
output_csv,
|
||||
dedupe=args.dedupe,
|
||||
dedupe_scope=args.dedupe_scope,
|
||||
lineterminator=lineterminator,
|
||||
)
|
||||
|
||||
msg = (
|
||||
f"Done! Parsed {len(policies)} policies and wrote {rows_written} rows to '{output_csv}'. "
|
||||
f"(newline={args.newline}"
|
||||
)
|
||||
if args.dedupe:
|
||||
msg += f", dedupe={args.dedupe_scope}"
|
||||
msg += ")"
|
||||
print(msg)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user