IntunePolicyParser/parseIntuneSettingsCSV.py

import os
import io
import argparse
import pandas as pd

# --- Parsing helpers -------------------------------------------------------

def _read_csv_flexible(text: str) -> pd.DataFrame:
    """Try parsing CSV text with common delimiters and forgiving options.
    Returns an empty DataFrame if nothing works.
    """
    for sep in (';', ','):
        try:
            df = pd.read_csv(io.StringIO(text), sep=sep, dtype=str, engine='python', on_bad_lines='skip')
            if not df.empty:
                return df.fillna('')
        except Exception:
            continue
    return pd.DataFrame()


def process_csv(file_path: str) -> pd.DataFrame:
    """Process a single CSV file.

    Supports two formats:
    1) "Older" sectioned exports that contain headers like `# Basic info` and `# Settings`.
       - We split by the `# Settings` marker and parse each block as CSV.
       - Adds a `Profile Name` column using the "Basics" table's Name/Value.
    2) "New" simple CSV exports (typically semicolon-delimited) with two columns (Name;Value).
       - We read the whole file and normalize to columns `Name`, `Value`.
       - Adds a `Profile Name` column derived from the filename.
    """
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            raw_text = f.read()
    except Exception as e:
        print(f"Skipping {file_path}: cannot read file ({e})")
        return pd.DataFrame()

    # Detect sectioned format by presence of the '# Settings' marker
    if '\n# Settings' in raw_text or raw_text.startswith('# Settings'):
        # --- Sectioned format ---
        basic_info_lines = []
        settings_lines = []
        settings_section = False

        for line in raw_text.splitlines(True):  # keep line endings
            if line.startswith('# Settings'):
                settings_section = True
                continue
            if settings_section:
                settings_lines.append(line)
            else:
                basic_info_lines.append(line)

        # Parse basics
        basic_df = _read_csv_flexible(''.join(basic_info_lines))
        if basic_df.empty:
            print(f"Warning: could not parse Basics in {os.path.basename(file_path)}")
            profile_name = os.path.splitext(os.path.basename(file_path))[0]
        else:
            # Try to locate Name/Value pair where Name == 'Name' and Category == 'Basics'
            cols = {c.lower().strip(): c for c in basic_df.columns}
            name_col = cols.get('name')
            value_col = cols.get('value')
            category_col = cols.get('category')
            profile_name = os.path.splitext(os.path.basename(file_path))[0]
            try:
                if name_col and value_col:
                    basics = basic_df
                    if category_col:
                        basics = basics[ (basics[name_col] == 'Name') & (basics[category_col] == 'Basics') ]
                    else:
                        basics = basics[ basics[name_col] == 'Name' ]
                    if not basics.empty:
                        profile_name = basics.iloc[0][value_col]
            except Exception:
                pass

        # Parse settings
        settings_df = _read_csv_flexible(''.join(settings_lines))
        if settings_df.empty:
            print(f"Warning: could not parse Settings in {os.path.basename(file_path)}")
            return pd.DataFrame()

        settings_df['Profile Name'] = profile_name
        return settings_df.fillna('')

    # --- New simple CSV format ---
    df = _read_csv_flexible(raw_text)
    if df.empty:
        print(f"Warning: empty or unrecognized CSV in {os.path.basename(file_path)}")
        return pd.DataFrame()

    # Normalize probable two-column shape to Name/Value
    norm_map = {c: c.strip().strip('"').lower() for c in df.columns}
    df.columns = [norm_map[c] for c in df.columns]

    name_col = next((c for c in df.columns if c == 'name'), None)
    value_col = next((c for c in df.columns if c == 'value'), None)

    if name_col and value_col:
        tidy = df[[name_col, value_col]].copy()
        tidy.rename(columns={name_col: 'Name', value_col: 'Value'}, inplace=True)
    else:
        # If it doesn't look like the new two-column format, skip gracefully
        return pd.DataFrame()

    tidy['Profile Name'] = os.path.splitext(os.path.basename(file_path))[0]
    return tidy[['Profile Name', 'Name', 'Value']].fillna('')


# --- CLI & orchestration ---------------------------------------------------

def consolidate_folder(input_dir: str, output_file: str) -> int:
    files = [
        os.path.join(input_dir, f)
        for f in os.listdir(input_dir)
        if f.lower().endswith('.csv') and os.path.isfile(os.path.join(input_dir, f))
    ]
    files.sort()

    frames = []
    for path in files:
        part = process_csv(path)
        if not part.empty:
            # Ensure consistently named columns for consolidation
            if 'Name' in part.columns and 'Value' in part.columns:
                part = part.rename(columns={'Name': 'Setting', 'Value': 'Value'})
            frames.append(part)

    if not frames:
        print("No usable CSVs found to consolidate.")
        return 0

    result = pd.concat(frames, ignore_index=True).fillna('')

    # Light dedupe within the consolidated output (by Profile/Setting/Value if present)
    key_cols = [c for c in ('Profile Name', 'Setting', 'Value') if c in result.columns]
    if key_cols:
        before = len(result)
        result = result.drop_duplicates(subset=key_cols)
        after = len(result)
        print(f"Deduplicated {before - after} rows (by {', '.join(key_cols)}).")

    # Choose writer based on extension
    ext = os.path.splitext(output_file)[1].lower()
    if ext in ('.xlsx', '.xlsm'):
        result.to_excel(output_file, index=False)
    else:
        # default to CSV with LF endings and UTF-8
        result.to_csv(output_file, index=False, encoding='utf-8', line_terminator='\n')

    print(f"Wrote {len(result)} rows from {len(frames)} files to {output_file}")
    return len(result)


def main():
    parser = argparse.ArgumentParser(
        description=(
            "Consolidate Intune settings from a folder of CSV exports (supports both sectioned "
            "and simple Name/Value formats)."
        )
    )
    parser.add_argument(
        'input_dir', nargs='?', default='.',
        help='Directory containing CSV files to process (default: current directory)'
    )
    parser.add_argument(
        '-o', '--output', default='consolidated_policies.xlsx',
        help='Output file path (CSV or XLSX). Default: consolidated_policies.xlsx'
    )

    args = parser.parse_args()

    consolidate_folder(args.input_dir, args.output)


if __name__ == '__main__':
    main()