commit 17d745bdac5beabe0be1a0f751749f98c8acbbe9 Author: Tomas Kracmar Date: Fri Apr 17 15:57:35 2026 +0200 Sync from dev @ 252c1cf Source: main (252c1cf) Excluded: live tenant exports, generated artifacts, and dev-only tooling. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..375cfb2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +docs/share/ +docs/security-review-package.pdf +docs/security-review-questionnaire.pdf +node_modules/ +__pycache__/ +*.py[cod] +*$py.class diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..456ea15 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,198 @@ +# Agent Guidance: Intune / Entra Drift Backup + +This repository tracks Git-based snapshots of Microsoft Intune and Entra ID configuration, generates review reports, and drives a rolling pull-request workflow for post-change review and remediation. + +## Project Overview + +The implementation is centered on three Azure DevOps pipelines: + +- `azure-pipelines.yml`: hourly backup/export pipeline with rolling PR management. +- `azure-pipelines-review-sync.yml`: 20-minute reviewer-decision sync and post-merge remediation queue. +- `azure-pipelines-restore.yml`: manual or auto-queued restore pipeline for approved baseline rollback. + +Workflow at a high level: +1. Export Intune and Entra configuration into `tenant-state/`. +2. Generate Markdown/CSV reports in `tenant-state/reports/`. +3. Filter known non-actionable drift noise before commit. +4. Commit workload drift to `drift/intune` and `drift/entra`. +5. Create or update one rolling PR per workload into `main`. +6. Refresh the PR description with deterministic change/risk summary and optional Azure OpenAI narrative. +7. Apply reviewer `/reject` or `/accept` decisions and queue restore when needed. + +## Technology Stack + +- **Python 3**: primary language for all automation scripts. +- **Azure DevOps Pipelines**: YAML-based CI/CD (`azure-pipelines.yml`, `azure-pipelines-review-sync.yml`, `azure-pipelines-restore.yml`). +- **PowerShell & Bash**: inline pipeline steps for Git operations, token retrieval, and conditional logic. +- **IntuneCD** (Python package, pinned to `2.5.0`): exports Intune configuration and restores baseline state. +- **Microsoft Graph API**: reads/writes tenant configuration and resolves references. +- **Node.js / md-to-pdf** (v5.2.5): generates HTML and PDF documentation artifacts from Markdown on full runs. +- **Azure OpenAI**: optional PR narrative generation. + +## Repository Layout + +``` +. +├── azure-pipelines.yml # Main hourly backup pipeline +├── azure-pipelines-review-sync.yml # 20-minute review sync +├── azure-pipelines-restore.yml # Baseline restore pipeline +├── scripts/ # Python automation helpers +├── tests/ # unittest coverage for scripts +├── tenant-state/ # Committed JSON exports and reports +│ ├── intune/ +│ ├── entra/ +│ └── reports/ +├── docs/ # Security review docs and roadmap +├── md2pdf/ # HTML/PDF styling and configs +├── prod-as-built.md # Generated as-built source +└── README.md # Operational overview for humans +``` + +### Key Scripts + +- `export_entra_baseline.py`: Graph API export for Entra objects (Named Locations, Authentication Strengths, Conditional Access, App Registrations, Enterprise Applications). +- `commit_entra_drift.py`: commits Entra drift with author attribution from audit logs. +- `resolve_ca_references.py`: resolves Conditional Access GUID references to human-readable names. +- `filter_entra_enrichment_noise.py`: reverts JSON churn caused by best-effort Graph enrichment (owners, app roles). +- `filter_intune_partial_settings_noise.py`: reverts partial Settings Catalog exports. +- `generate_assignment_report.py`: produces Markdown and CSV assignment inventories. +- `generate_app_inventory_report.py`: produces Entra apps inventory CSV. +- `generate_object_inventory_reports.py`: produces per-category object inventory CSVs. +- `validate_backup_outputs.py`: asserts required files exist after export. +- `ensure_rolling_pr.py`: creates or updates one rolling drift PR per workload. +- `update_pr_review_summary.py`: refreshes PR descriptions with change counts, risk assessment, and optional AI narrative. +- `apply_reviewer_rejections.py`: processes `/reject` and `/accept` reviewer thread commands. +- `queue_post_merge_restore.py`: queues restore pipeline after merged PRs that contained `/reject` decisions. + +## Code Style and Conventions + +- Every Python file starts with `#!/usr/bin/env python3` and `from __future__ import annotations`. +- Type hints are used throughout (`typing.Any`, `argparse.Namespace`, etc.). +- Internal helper functions are prefixed with `_`. +- Common environment parsing helpers appear in multiple scripts: + - `_env_text(name, default="")` – reads and sanitizes env vars, treating unresolved Azure DevOps macros `$(...)` as empty. + - `_env_bool(name, default=False)` – interprets `1`, `true`, `yes`, `on` as boolean true. +- Arguments use `argparse` with typed flags; pipeline variables are passed as env vars or CLI args. +- JSON is written with `indent=4` or `indent=5` and `ensure_ascii=False`. +- HTTP calls to Graph or Azure DevOps REST APIs use `urllib.request` (no external HTTP library). + +## Testing + +Tests are written with the Python standard library `unittest` framework. There is **no pytest configuration** (`pyproject.toml`, `setup.py`, or `pytest.ini` are absent). Modules are loaded dynamically in tests using `importlib.util.spec_from_file_location` so that scripts in `scripts/` do not need to be on `PYTHONPATH`. + +### Run Tests + +```bash +python3 -m unittest discover -s tests -v +``` + +### Test Coverage Areas + +- `test_ensure_rolling_pr.py`: rolling PR creation, draft publishing, merge strategy logic. +- `test_export_entra_baseline.py`: Entra export parsing, concurrent export behavior, error handling. +- `test_filter_entra_enrichment_noise.py`: enrichment-only churn detection and reversion. +- `test_filter_intune_partial_settings_noise.py`: partial Settings Catalog export filtering. +- `test_queue_post_merge_restore.py`: post-merge restore queueing logic. +- `test_update_pr_review_summary.py`: semantic diffing, AI thread management, PR description upserts. +- `test_validate_backup_outputs.py`: validation rules for Intune and Entra outputs. + +## Build and Runtime Architecture + +There is no traditional build step for the Python code. The pipelines install runtime dependencies on each run: + +```bash +pip3 install "IntuneCD==2.5.0" +``` + +For local development, only a Python 3 interpreter is required; scripts use the standard library except for the optional IntuneCD package. + +### Pipeline Jobs + +- **Intune backup job** (`backup_intune`): + 1. Prepare `drift/intune` branch from `main`. + 2. Decide light vs full mode (configured full-run hour or `forceFullRun=true`). + 3. Run `IntuneCD-startbackup`. + 4. Filter partial Settings Catalog exports. + 5. Resolve assignment group names from Graph. + 6. Generate assignment and object inventory reports. + 7. Validate outputs. + 8. Commit drift and update rolling PR. + +- **Entra backup job** (`backup_entra`): + 1. Prepare `drift/entra` branch from `main`. + 2. Export selected categories with `export_entra_baseline.py`. + 3. Resolve Conditional Access references. + 4. Generate reports. + 5. Validate outputs. + 6. Filter enrichment noise and commit drift. + +- **Review sync jobs** (`sync_intune_review_decisions`, `sync_entra_review_decisions`): + 1. Apply `/reject` decisions. + 2. Update automated PR summary. + 3. Queue post-merge restore when needed. + +- **Restore job** (`restore_from_baseline`): + 1. Checkout approved baseline snapshot (branch, tag, or commit). + 2. Prepare restore scope (`full` or `selective`). + 3. Normalize payload JSON and strip display-only assignment labels. + 4. Run `IntuneCD-startupdate` with optional `--entraupdate`. + +## Security Considerations + +- **Token handling**: Graph tokens are obtained via `Get-AzAccessToken` in PowerShell and passed as secret pipeline variables. Token payload is decoded to validate required application permissions before use. +- **Service connection**: Azure DevOps service connection (e.g. `sc-astral-backup`) uses workload federated credentials. +- **Permissions**: read-only permissions for backup; read-write permissions (`...ReadWrite.All`) for restore. Missing roles are surfaced as pipeline errors before any Graph mutations occur. +- **Path traversal**: selective restore paths are normalized and validated against `..` segments before file copy. +- **Dry run**: restore pipeline defaults to `dryRun=true` and must be explicitly overridden to push changes. +- **Access token scope**: `System.AccessToken` is required for PR and thread management via Azure DevOps REST APIs. + +## Common Development Tasks + +### Generate Entra export locally + +```bash +python3 ./scripts/export_entra_baseline.py \ + --root ./tenant-state/entra \ + --token "$GRAPH_TOKEN" \ + --enterprise-app-workers 8 +``` + +### Resolve Conditional Access references locally + +```bash +python3 ./scripts/resolve_ca_references.py \ + --root ./tenant-state/entra \ + --token "$GRAPH_TOKEN" +``` + +### Generate assignment report locally + +```bash +python3 ./scripts/generate_assignment_report.py \ + --root ./tenant-state/intune \ + --output-dir ./tenant-state/reports/intune +``` + +### Validate backup outputs locally + +```bash +python3 ./scripts/validate_backup_outputs.py \ + --workload intune \ + --mode light \ + --root ./tenant-state/intune \ + --reports-root ./tenant-state/reports/intune +``` + +## Key Environment / Pipeline Variables + +- `BASELINE_BRANCH` (default: `main`) +- `DRIFT_BRANCH_INTUNE` (default: `drift/intune`) +- `DRIFT_BRANCH_ENTRA` (default: `drift/entra`) +- `BACKUP_FOLDER` (default: `tenant-state`) +- `ENABLE_WORKLOAD_INTUNE` / `ENABLE_WORKLOAD_ENTRA` +- `ENABLE_PR_REVIEW_SUMMARY` / `ENABLE_PR_REVIEWER_DECISIONS` +- `AUTO_REMEDIATE_AFTER_MERGE` / `AUTO_REMEDIATE_DRY_RUN` +- `ENABLE_PR_AI_SUMMARY` + `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT`, `AZURE_OPENAI_API_KEY` +- `ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS` / `ROLLING_PR_MERGE_STRATEGY` + +See the top of each pipeline YAML for the full variable list and defaults. diff --git a/README.md b/README.md new file mode 100644 index 0000000..a1bd78e --- /dev/null +++ b/README.md @@ -0,0 +1,422 @@ +# Intune / Entra Drift Backup + +This repository keeps Git-tracked snapshots of Microsoft Intune and selected Entra ID configuration, generates review reports, and drives a rolling pull-request workflow for post-change review and remediation. + +> **Product name:** ASTRAL (Admin Security Through Review, Automation & Least-privilege) + +## Getting Started + +This repository is designed to be forked or downloaded into your own Azure DevOps organization. Each tenant gets its own project and pipeline instance. + +Quick start: + +1. Fork or import this repository into an Azure DevOps project. +2. Review `templates/variables-tenant.yml` and create a matching Azure DevOps Variable Group in your project (e.g. `vg-astral-tenant`). +3. Uncomment the variable group reference in the three pipeline YAMLs. +4. Run `deploy/bootstrap-tenant.ps1` to create the Azure AD app registration, assign Graph permissions, and configure the federated credential. +5. Create the Azure DevOps service connection using the app registration details from the bootstrap script. +6. Import the three pipelines (`azure-pipelines.yml`, `azure-pipelines-review-sync.yml`, `azure-pipelines-restore.yml`) into Azure DevOps. +7. Run `deploy/validate-deployment.yml` to verify connectivity and permissions. +8. Set `AUTO_REMEDIATE_RESTORE_PIPELINE_ID` in your variable group after the restore pipeline is imported. + +See [`deploy/onboarding-runbook.md`](deploy/onboarding-runbook.md) for the full step-by-step guide. + +## What The Repository Does + +The implementation is centered on three Azure DevOps pipelines: + +- `azure-pipelines.yml`: hourly backup/export pipeline with rolling PR management. +- `azure-pipelines-review-sync.yml`: 20-minute reviewer-decision sync and post-merge remediation queue. +- `azure-pipelines-restore.yml`: manual or auto-queued restore pipeline for approved baseline rollback. + +The main workflow is: + +1. Export Intune and Entra configuration into `tenant-state/`. +2. Generate Markdown/CSV reports in `tenant-state/reports/`. +3. Filter known non-actionable drift noise before commit. +4. Commit workload drift to `drift/intune` and `drift/entra`. +5. Create or update one rolling PR per workload into `main`. +6. Refresh the PR description with deterministic change/risk summary and optional Azure OpenAI narrative. +7. Apply reviewer `/reject` or `/accept` decisions and queue restore when needed. + +This is an ex-post change-management model: admins can change settings in the Microsoft admin portals, and the repo turns those changes into auditable Git drift with a review and rollback path. + +## Current Baseline Coverage + +Intune currently tracks: + +- App Configuration +- App Protection +- Apple Push Notification +- Apple VPP Tokens +- Applications +- Compliance Policies +- Device Configurations +- Device Management Settings +- Enrollment Configurations +- Enrollment Profiles +- Filters +- Scope Tags +- Scripts +- Settings Catalog + +Entra currently tracks: + +- Named Locations +- Authentication Strengths +- Conditional Access +- App Registrations +- Enterprise Applications + +Current scope behavior: + +- Named Locations, Authentication Strengths, and Conditional Access run on hourly light runs and midnight full runs. +- App Registrations and Enterprise Applications are enabled in the pipeline but exported only on full runs. +- During light runs, the previous drift-branch snapshot of `App Registrations` and `Enterprise Applications` is preserved to avoid churn and heavy export cost. + +## Repository Layout + +- `README.md`: operational overview. +- `azure-pipelines.yml`: backup/export, report generation, drift commit, rolling PR, and docs/artifact flow. +- `azure-pipelines-review-sync.yml`: reviewer decision sync and post-merge remediation helper. +- `azure-pipelines-restore.yml`: baseline restore pipeline with full or selective scope. +- `docs/m365-baseline-roadmap.md`: expansion roadmap beyond current workload scope. +- `docs/security-review-package.md`: implementation-focused security review package. +- `docs/security-review-questionnaire.md`: short-form security review answers. +- `scripts/`: export, reporting, PR automation, validation, and remediation helpers. +- `tests/`: focused unit coverage for the Python helpers. +- `tenant-state/intune`: committed Intune JSON export. +- `tenant-state/entra`: committed Entra JSON export. +- `tenant-state/reports/intune`: Intune CSV/Markdown reports. +- `tenant-state/reports/entra`: Entra CSV/Markdown reports. +- `prod-as-built.md`: generated as-built document source. +- `md2pdf/`: HTML/PDF styling and config for documentation publish. + +## Pipeline Model + +### Main Backup Pipeline + +`azure-pipelines.yml` runs hourly on `main`. + +For Intune it: + +1. Prepares `drift/intune` from `main`. +2. Chooses light vs full mode from the configured local timezone, with `forceFullRun=true` override. +3. Runs IntuneCD export. +4. Reverts partial Settings Catalog exports with `scripts/filter_intune_partial_settings_noise.py`. +5. Resolves assignment group names from Graph when needed. +6. Generates assignment and object inventory reports. +7. Validates outputs with `scripts/validate_backup_outputs.py`. +8. Commits drift and updates the rolling PR flow. + +For Entra it: + +1. Prepares `drift/entra` from `main`. +2. Chooses effective export scope per mode. +3. Exports selected categories with `scripts/export_entra_baseline.py`. +4. Resolves Conditional Access reference names with `scripts/resolve_ca_references.py`. +5. Generates assignment, app, and object inventory reports. +6. Validates outputs with `scripts/validate_backup_outputs.py`. +7. Reverts enrichment-only JSON churn with `scripts/filter_entra_enrichment_noise.py`. +8. Commits drift with `scripts/commit_entra_drift.py`. + +### Review Sync Pipeline + +`azure-pipelines-review-sync.yml` runs every 20 minutes on `main` and exists to shorten the reviewer feedback loop. + +Per workload it can: + +- apply reviewer `/reject` and `/accept` decisions with `scripts/apply_reviewer_rejections.py` +- refresh the automated PR summary +- queue restore after merged PRs that contained reviewer `/reject` decisions using `scripts/queue_post_merge_restore.py` + +### Restore Pipeline + +`azure-pipelines-restore.yml` restores from approved baseline (`main` by default) or from a historical branch, tag, or commit. + +Supported restore modes: + +- `full`: restore the full committed Intune baseline +- `selective`: restore only selected file paths + +It also supports optional Entra update when restore automation is triggered for Entra review outcomes. + +## Schedule And Run Modes + +- Main backup schedule: hourly, `0 * * * *`, on `main` +- Review sync schedule: every 20 minutes, `*/20 * * * *`, on `main` +- Full mode: configured full-run hour (default 00:00) or manual queue with `forceFullRun=true` +- Light mode: every other scheduled hour + +Full mode adds: + +- full Entra scope, including App Registrations and Enterprise Applications +- Intune split-documentation generation +- HTML/PDF artifact generation when browser dependencies are available +- optional tagging and documentation publish steps + +## Branch And PR Model + +- Baseline branch: `main` +- Drift branches: + - `drift/intune` + - `drift/entra` + +Each workload keeps one rolling PR open to `main`. + +Key behavior: + +- reports are generated in `tenant-state/reports/*` but excluded from rolling drift commits and PR diffs +- rolling PRs can be created as draft first, then published after automated summary generation when `ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS=true` +- merge strategy for the rolling PR is controlled by `ROLLING_PR_MERGE_STRATEGY` and defaults to `rebase` + +## Review, Tickets, And Remediation + +The PR automation currently supports: + +- deterministic operation counts and risk assessment +- rename-aware semantic comparison +- stable change fingerprinting for idempotent summary refresh +- optional Azure OpenAI reviewer narrative +- optional per-file `Change Needed` review threads when `REQUIRE_CHANGE_TICKETS=true` + +Reviewer thread commands: + +- `/reject`: remove that file-level drift from the rolling PR by resetting it to baseline +- `/accept`: keep that file in PR scope + +Supported remediation paths: + +1. Reject and abandon a whole rolling PR. + The next run can detect the matching rejected snapshot and queue restore automatically. +2. Reject selected files in ticket threads, then merge the accepted remainder. + The review-sync pipeline can queue restore after merge so the tenant is reconciled to the merged baseline. +3. Queue `azure-pipelines-restore.yml` manually for full or selective historical rollback. + +## Key Variables + +Core repo and branch settings: + +- `BASELINE_BRANCH` +- `DRIFT_BRANCH_INTUNE` +- `DRIFT_BRANCH_ENTRA` +- `ROLLING_PR_TITLE_INTUNE` +- `ROLLING_PR_TITLE_ENTRA` +- `BACKUP_FOLDER` +- `INTUNE_BACKUP_SUBDIR` +- `ENTRA_BACKUP_SUBDIR` +- `REPORTS_SUBDIR` + +Workload toggles: + +- `ENABLE_WORKLOAD_INTUNE` +- `ENABLE_WORKLOAD_ENTRA` +- `ENABLE_ENTRA_CONDITIONAL_ACCESS` + +Intune behavior: + +- `INTUNECD_VERSION` +- `EXCLUDE_SCRIPT_BACKUP` +- `INTUNE_EXCLUDE_CSV` +- `SPLIT_DOCUMENTATION` + +Entra behavior: + +- `ENTRA_INCLUDE_NAMED_LOCATIONS` +- `ENTRA_INCLUDE_AUTHENTICATION_STRENGTHS` +- `ENTRA_INCLUDE_CONDITIONAL_ACCESS` +- `ENTRA_INCLUDE_APP_REGISTRATIONS` +- `ENTRA_INCLUDE_ENTERPRISE_APPS` +- `ENTRA_ENTERPRISE_APP_WORKERS` + +PR and reviewer automation: + +- `ENABLE_PR_REVIEW_SUMMARY` +- `ENABLE_PR_REVIEWER_DECISIONS` +- `ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS` +- `ROLLING_PR_MERGE_STRATEGY` +- `REQUIRE_CHANGE_TICKETS` +- `CHANGE_TICKET_REGEX` +- `DEBUG_CHANGE_TICKET_THREADS` + +Auto-remediation: + +- `AUTO_REMEDIATE_ON_PR_REJECTION` +- `AUTO_REMEDIATE_AFTER_MERGE` +- `AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS` +- `AUTO_REMEDIATE_RESTORE_PIPELINE_ID` +- `AUTO_REMEDIATE_DRY_RUN` +- `AUTO_REMEDIATE_UPDATE_ASSIGNMENTS` +- `AUTO_REMEDIATE_REMOVE_OBJECTS` +- `AUTO_REMEDIATE_MAX_WORKERS` +- `AUTO_REMEDIATE_EXCLUDE_CSV` + +Azure OpenAI integration: + +- `ENABLE_PR_AI_SUMMARY` +- `AZURE_OPENAI_ENDPOINT` +- `AZURE_OPENAI_DEPLOYMENT` +- `AZURE_OPENAI_API_KEY` +- `AZURE_OPENAI_API_VERSION` +- `PR_AI_PAYLOAD_MAX_BYTES` +- `PR_AI_MAX_TOKENS` +- `PR_AI_COMPACT_MAX_CHARS` + +## Required Azure DevOps Permissions + +The pipeline build identity should have repository permissions to: + +- contribute +- create branch +- force push +- create and update pull requests +- create tag if tagging is enabled + +For auto-queued restore, the same identity also needs on `azure-pipelines-restore.yml`: + +- `View builds` +- `Queue builds` +- pipeline authorization if explicit pipeline permissions are enforced + +Also enable script access to `System.AccessToken`. + +## Required Microsoft Graph Application Permissions + +Baseline read permissions used by the current implementation: + +- `Device.Read.All` +- `DeviceManagementApps.Read.All` +- `DeviceManagementConfiguration.Read.All` +- `DeviceManagementManagedDevices.Read.All` +- `DeviceManagementRBAC.Read.All` +- `DeviceManagementScripts.Read.All` +- `DeviceManagementServiceConfig.Read.All` +- `Group.Read.All` +- `Policy.Read.All` +- `Policy.Read.ConditionalAccess` +- `Policy.Read.DeviceConfiguration` +- `User.Read.All` + +Additional read permissions used by the current Entra scope: + +- `Application.Read.All` +- `RoleManagement.Read.Directory` or `Directory.Read.All` for richer name resolution +- `AuditLog.Read.All` for best-effort Entra drift author attribution + +Restore pipeline write permissions: + +- `DeviceManagementApps.ReadWrite.All` +- `DeviceManagementConfiguration.ReadWrite.All` +- `DeviceManagementManagedDevices.ReadWrite.All` +- `DeviceManagementRBAC.ReadWrite.All` +- `DeviceManagementScripts.ReadWrite.All` +- `DeviceManagementServiceConfig.ReadWrite.All` +- `Group.Read.All` + +Additional restore permission when `includeEntraUpdate=true`: + +- `Policy.Read.All` +- `Policy.ReadWrite.ConditionalAccess` + +## Outputs + +Intune outputs: + +- JSON backup under `tenant-state/intune/**` +- `tenant-state/reports/intune/policy-assignments.md` +- `tenant-state/reports/intune/policy-assignments.csv` +- `tenant-state/reports/intune/object-inventory-all.csv` +- `tenant-state/reports/intune/Object Inventory/*-inventory.csv` + +Entra outputs: + +- JSON backup under `tenant-state/entra/**` +- `tenant-state/reports/entra/policy-assignments.md` +- `tenant-state/reports/entra/policy-assignments.csv` +- `tenant-state/reports/entra/apps-inventory.csv` +- `tenant-state/reports/entra/object-inventory-all.csv` +- `tenant-state/reports/entra/Object Inventory/*-inventory.csv` + +Full-run documentation artifacts: + +- `prod-as-built-split-markdown` +- `prod-as-built-split-html` +- `prod-as-built-split-pdf` + +## Local Script Usage + +Generate Entra export locally: + +```bash +python3 ./scripts/export_entra_baseline.py \ + --root ./tenant-state/entra \ + --token "$GRAPH_TOKEN" \ + --enterprise-app-workers 8 +``` + +Resolve Conditional Access references: + +```bash +python3 ./scripts/resolve_ca_references.py \ + --root ./tenant-state/entra \ + --token "$GRAPH_TOKEN" +``` + +Generate Intune assignment report: + +```bash +python3 ./scripts/generate_assignment_report.py \ + --root ./tenant-state/intune \ + --output-dir ./tenant-state/reports/intune +``` + +Generate Entra assignment report: + +```bash +python3 ./scripts/generate_assignment_report.py \ + --root ./tenant-state/entra \ + --output-dir ./tenant-state/reports/entra +``` + +Generate Entra apps inventory: + +```bash +python3 ./scripts/generate_app_inventory_report.py \ + --root ./tenant-state/entra \ + --output-dir ./tenant-state/reports/entra +``` + +Generate workload object inventories: + +```bash +python3 ./scripts/generate_object_inventory_reports.py \ + --root ./tenant-state/intune \ + --output-dir ./tenant-state/reports/intune + +python3 ./scripts/generate_object_inventory_reports.py \ + --root ./tenant-state/entra \ + --output-dir ./tenant-state/reports/entra +``` + +Validate backup outputs: + +```bash +python3 ./scripts/validate_backup_outputs.py \ + --workload intune \ + --mode light \ + --root ./tenant-state/intune \ + --reports-root ./tenant-state/reports/intune +``` + +## Tests + +The repository includes focused unit tests for: + +- Entra export behavior +- backup output validation +- rolling PR creation/update logic +- PR summary generation +- reviewer rejection processing +- post-merge restore queueing +- Intune partial-export noise filtering +- Entra enrichment-noise filtering diff --git a/azure-pipelines-restore.yml b/azure-pipelines-restore.yml new file mode 100644 index 0000000..34f941e --- /dev/null +++ b/azure-pipelines-restore.yml @@ -0,0 +1,611 @@ +trigger: none +pr: none + +parameters: + - name: dryRun + displayName: Dry run only (report, no changes pushed) + type: boolean + default: true + - name: updateAssignments + displayName: Update assignments + type: boolean + default: true + - name: removeObjectsNotInBaseline + displayName: Remove objects not present in baseline + type: boolean + default: false + - name: includeEntraUpdate + displayName: Include Entra updates + type: boolean + default: false + - name: baselineBranch + displayName: Baseline branch to restore from + type: string + default: main + - name: baselineRef + displayName: Optional historical git ref (branch/tag/commit) to restore from + type: string + default: "" + - name: restoreMode + displayName: Restore mode (`full` or `selective`) + type: string + default: full + - name: restorePathsCsv + displayName: Selective restore file paths (CSV; repo-relative or intune-relative) + type: string + default: "" + - name: maxWorkers + displayName: IntuneCD max workers + type: number + default: 10 + - name: excludeCsv + displayName: Exclude object categories (comma-separated IntuneCD keys) + type: string + default: "" + +variables: + # Tenant-specific values are expected in a variable group (see templates/variables-tenant.yml). + # Uncomment the line below after creating the group in your Azure DevOps project. + # - group: vg-astral-tenant + - template: templates/variables-common.yml + - name: BACKUP_FOLDER + value: tenant-state + - name: INTUNE_BACKUP_SUBDIR + value: intune + - name: INTUNECD_VERSION + value: 2.5.0 + +jobs: + - job: restore_from_baseline + displayName: Restore tenant from approved baseline + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Checkout approved baseline snapshot + inputs: + targetType: inline + script: | + set -euo pipefail + + TARGET_REF_RAW="${{ parameters.baselineRef }}" + TARGET_REF="$(echo "$TARGET_REF_RAW" | xargs)" + TARGET_REF_LOWER="$(echo "$TARGET_REF" | tr '[:upper:]' '[:lower:]')" + if echo "$TARGET_REF" | grep -Eq '^\$\([^)]+\)$'; then + TARGET_REF="" + elif [ "$TARGET_REF_LOWER" = "none" ] || [ "$TARGET_REF_LOWER" = "null" ] || [ "$TARGET_REF_LOWER" = "n/a" ] || [ "$TARGET_REF_LOWER" = "-" ] || [ "$TARGET_REF_LOWER" = "_none_" ]; then + TARGET_REF="" + fi + if [ -z "$TARGET_REF" ]; then + TARGET_REF="${{ parameters.baselineBranch }}" + fi + + git fetch --quiet --tags origin + git fetch --quiet origin "${{ parameters.baselineBranch }}" + + RESOLVED_REF="" + if git rev-parse --verify --quiet "origin/$TARGET_REF^{commit}" >/dev/null; then + RESOLVED_REF="origin/$TARGET_REF" + elif git rev-parse --verify --quiet "$TARGET_REF^{commit}" >/dev/null; then + RESOLVED_REF="$TARGET_REF" + elif git fetch --quiet origin "$TARGET_REF" >/dev/null 2>&1; then + RESOLVED_REF="FETCH_HEAD" + fi + + if [ -z "$RESOLVED_REF" ]; then + echo "##vso[task.logissue type=error]Unable to resolve baseline ref '$TARGET_REF'." + echo "Checked local ref, origin/, and direct fetch origin ." + exit 1 + fi + + git checkout --force --detach "$RESOLVED_REF" + RESOLVED_COMMIT="$(git rev-parse HEAD)" + echo "Restore baseline snapshot selected: requested=$TARGET_REF resolved=$RESOLVED_REF commit=$RESOLVED_COMMIT" + echo "##vso[task.setvariable variable=RESTORE_BASELINE_REF]$TARGET_REF" + echo "##vso[task.setvariable variable=RESTORE_BASELINE_COMMIT]$RESOLVED_COMMIT" + test -d "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" + + - task: Bash@3 + displayName: Install IntuneCD + inputs: + targetType: inline + script: | + set -euo pipefail + pip3 install "IntuneCD==$(INTUNECD_VERSION)" + + - task: Bash@3 + displayName: Prepare restore payload scope + inputs: + targetType: inline + script: | + set -euo pipefail + python3 - <<'PY' + import os + import pathlib + import shutil + import sys + + repo_root = pathlib.Path(os.environ["BUILD_SOURCESDIRECTORY"]).resolve() + backup_folder = os.environ["BACKUP_FOLDER"] + intune_subdir = os.environ["INTUNE_BACKUP_SUBDIR"] + restore_mode = os.environ.get("RESTORE_MODE", "").strip().lower() or "full" + restore_paths_csv = os.environ.get("RESTORE_PATHS_CSV", "").strip() + temp_root = pathlib.Path(os.environ["AGENT_TEMPDIRECTORY"]).resolve() / "restore-scope-intune" + intune_root = repo_root / backup_folder / intune_subdir + + if not intune_root.is_dir(): + print(f"##vso[task.logissue type=error]Intune restore source root not found: {intune_root}") + raise SystemExit(1) + + if restore_mode == "full": + print(f"Restore mode: full (source={intune_root})") + print(f"##vso[task.setvariable variable=RESTORE_SOURCE_PATH]{intune_root}") + raise SystemExit(0) + + if restore_mode not in {"selective", "paths"}: + print(f"##vso[task.logissue type=error]Unsupported restoreMode '{restore_mode}'. Use 'full' or 'selective'.") + raise SystemExit(1) + + raw_items = [item.strip() for item in restore_paths_csv.replace("\n", ",").split(",")] + raw_items = [item for item in raw_items if item] + if not raw_items: + print("##vso[task.logissue type=error]restoreMode=selective requires restorePathsCsv with at least one path.") + raise SystemExit(1) + + backup_prefix = f"{backup_folder}/{intune_subdir}/" + copied = [] + errors = [] + + if temp_root.exists(): + shutil.rmtree(temp_root) + temp_root.mkdir(parents=True, exist_ok=True) + + def normalize_to_intune_relative(path_text): + p = path_text.replace("\\", "/").lstrip("./") + if p.startswith("/"): + return None + if p.startswith(backup_prefix): + p = p[len(backup_prefix):] + elif p.startswith(f"{intune_subdir}/"): + p = p[len(intune_subdir) + 1 :] + return p.strip("/") + + for item in raw_items: + rel = normalize_to_intune_relative(item) + if not rel: + errors.append(f"Invalid path '{item}'") + continue + + parts = pathlib.PurePosixPath(rel).parts + if any(part in {"..", ""} for part in parts): + errors.append(f"Path traversal is not allowed: '{item}'") + continue + + src = intune_root.joinpath(*parts) + if not src.is_file(): + errors.append(f"Path not found in selected baseline snapshot: '{item}' -> '{src}'") + continue + + dst = temp_root.joinpath(*parts) + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dst) + copied.append(rel) + + if errors: + for message in errors: + print(f"##vso[task.logissue type=error]{message}") + raise SystemExit(1) + + if not copied: + print("##vso[task.logissue type=error]No files were prepared for selective restore.") + raise SystemExit(1) + + print(f"Restore mode: selective (files={len(copied)})") + for rel in copied: + print(f" - {backup_folder}/{intune_subdir}/{rel}") + print(f"##vso[task.setvariable variable=RESTORE_SOURCE_PATH]{temp_root}") + PY + env: + RESTORE_MODE: ${{ parameters.restoreMode }} + RESTORE_PATHS_CSV: ${{ parameters.restorePathsCsv }} + + - task: AzurePowerShell@5 + displayName: Get Graph token for restore + inputs: + azureSubscription: $(SERVICE_CONNECTION_NAME) + azurePowerShellVersion: LatestVersion + ScriptType: inlineScript + Inline: | + $getTokenParams = @{ + ResourceTypeName = 'MSGraph' + AsSecureString = $true + ErrorAction = 'Stop' + } + $tokenCommand = Get-Command Get-AzAccessToken -ErrorAction Stop + if ($tokenCommand.Parameters.ContainsKey('ForceRefresh')) { + $getTokenParams['ForceRefresh'] = $true + } + $accessToken = ([PSCredential]::New('dummy', (Get-AzAccessToken @getTokenParams).Token).GetNetworkCredential().Password) + + $tokenParts = $accessToken.Split('.') + if ($tokenParts.Length -lt 2) { throw "Invalid Graph access token format." } + $payload = $tokenParts[1].Replace('-', '+').Replace('_', '/') + switch ($payload.Length % 4) { + 2 { $payload += '==' } + 3 { $payload += '=' } + } + $payloadJson = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($payload)) + $claims = $payloadJson | ConvertFrom-Json + $roles = @($claims.roles) + $sortedRoles = $roles | Sort-Object + Write-Host "Graph token roles for restore: $($sortedRoles -join ', ')" + + $missingRoles = @() + $requiredIntuneWriteRoles = @( + 'DeviceManagementApps.ReadWrite.All', + 'DeviceManagementConfiguration.ReadWrite.All', + 'DeviceManagementManagedDevices.ReadWrite.All', + 'DeviceManagementRBAC.ReadWrite.All', + 'DeviceManagementScripts.ReadWrite.All', + 'DeviceManagementServiceConfig.ReadWrite.All', + 'Group.Read.All' + ) + foreach ($role in $requiredIntuneWriteRoles) { + if (-not ($roles -contains $role)) { $missingRoles += $role } + } + + if ("${{ parameters.includeEntraUpdate }}" -eq "true") { + $requiredEntraWriteRoles = @( + 'Policy.Read.All', + 'Policy.ReadWrite.ConditionalAccess' + ) + foreach ($role in $requiredEntraWriteRoles) { + if (-not ($roles -contains $role)) { $missingRoles += $role } + } + } + + if ($missingRoles.Count -gt 0) { + $missingRoles = $missingRoles | Select-Object -Unique | Sort-Object + Write-Host "##vso[task.logissue type=error]Graph token is missing restore permissions: $($missingRoles -join ', ')" + throw "Service connection token is missing required Graph application permissions for restore." + } + + Write-Host "##vso[task.setvariable variable=accessToken;issecret=true]$accessToken" + + - task: Bash@3 + displayName: Run IntuneCD restore/update + inputs: + targetType: inline + script: | + set -euo pipefail + echo "RESTORE_SCRIPT_VERSION=2026-03-12.8" + + to_lower() { + echo "$1" | tr '[:upper:]' '[:lower:]' + } + + DRY_RUN="$(to_lower "$DRY_RUN")" + UPDATE_ASSIGNMENTS="$(to_lower "$UPDATE_ASSIGNMENTS")" + REMOVE_UNMANAGED="$(to_lower "$REMOVE_UNMANAGED")" + ENTRA_UPDATE="$(to_lower "$ENTRA_UPDATE")" + + if [ -z "$(RESTORE_SOURCE_PATH)" ]; then + RESTORE_PATH="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" + else + RESTORE_PATH="$(RESTORE_SOURCE_PATH)" + fi + export RESTORE_PATH_ENV="$RESTORE_PATH" + + python3 - <<'PY' + import base64 + import json + import os + import pathlib + import re + import urllib.parse + import urllib.request + + root = pathlib.Path(os.environ["RESTORE_PATH_ENV"]).resolve() + if not root.exists(): + print(f"Restore source folder not found; payload normalization skipped: {root}") + raise SystemExit(0) + + graph_token = os.environ.get("GRAPH_TOKEN", "").strip() + guid_re = re.compile( + r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$" + ) + group_id_cache = {} + + def is_guid(value): + return bool(guid_re.match(str(value or "").strip())) + + def resolve_group_id(group_name): + name = str(group_name or "").strip() + if not name or not graph_token: + return None + + cache_key = name.lower() + if cache_key in group_id_cache: + return group_id_cache[cache_key] + + filter_value = name.replace("'", "''") + query = urllib.parse.urlencode( + { + "$select": "id,displayName", + "$filter": f"displayName eq '{filter_value}'", + } + ) + url = f"https://graph.microsoft.com/v1.0/groups?{query}" + request = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {graph_token}", + "Accept": "application/json", + }, + method="GET", + ) + try: + with urllib.request.urlopen(request, timeout=30) as response: + payload = json.loads(response.read().decode("utf-8")) + except Exception: + group_id_cache[cache_key] = None + return None + + values = payload.get("value", []) if isinstance(payload, dict) else [] + exact = [ + item for item in values + if str(item.get("displayName", "")) == name and is_guid(item.get("id")) + ] + if len(exact) == 1: + resolved = exact[0]["id"] + group_id_cache[cache_key] = resolved + return resolved + + ids = [item.get("id") for item in values if is_guid(item.get("id"))] + resolved = ids[0] if len(ids) == 1 else None + group_id_cache[cache_key] = resolved + return resolved + + def strip_assignment_display_labels(node): + removed = 0 + allowed_assignment_target_keys = { + "@odata.type", + "groupId", + "collectionId", + "deviceAndAppManagementAssignmentFilterId", + "deviceAndAppManagementAssignmentFilterType", + } + if isinstance(node, dict): + odata_type = str(node.get("@odata.type", "") or "").lower() + is_assignment_target = "assignmenttarget" in odata_type + + if is_assignment_target: + group_name = ( + node.get("groupName") + or node.get("groupDisplayName") + or node.get("displayName") + ) + group_id = str(node.get("groupId", "") or "").strip() + is_group_target = "groupassignmenttarget" in odata_type + if is_group_target and not is_guid(group_id): + resolved_group_id = resolve_group_id(group_name) + if resolved_group_id: + node["groupId"] = resolved_group_id + group_id = resolved_group_id + + for key in list(node.keys()): + if key.startswith("@odata."): + continue + if key not in allowed_assignment_target_keys: + if key in node: + node.pop(key, None) + removed += 1 + + # Keep only valid group assignment targets for update payload. + if is_group_target and not is_guid(node.get("groupId")): + node["__drop_assignment_target__"] = True + elif "groupId" in node: + for key in ("groupDisplayName", "groupName", "displayName", "groupType"): + if key in node: + node.pop(key, None) + removed += 1 + + if "targetDisplayName" in node and isinstance(node.get("target"), dict): + node.pop("targetDisplayName", None) + removed += 1 + + for value in node.values(): + removed += strip_assignment_display_labels(value) + elif isinstance(node, list): + for item in node: + removed += strip_assignment_display_labels(item) + return removed + + def prune_invalid_assignment_targets(node): + removed = 0 + if isinstance(node, dict): + assignments = node.get("assignments") + if isinstance(assignments, list): + filtered = [] + for assignment in assignments: + target = assignment.get("target") if isinstance(assignment, dict) else None + if isinstance(target, dict) and target.get("__drop_assignment_target__") is True: + removed += 1 + continue + filtered.append(assignment) + if len(filtered) != len(assignments): + node["assignments"] = filtered + for value in node.values(): + removed += prune_invalid_assignment_targets(value) + elif isinstance(node, list): + for item in node: + removed += prune_invalid_assignment_targets(item) + return removed + + def remove_internal_markers(node): + removed = 0 + if isinstance(node, dict): + if "__drop_assignment_target__" in node: + node.pop("__drop_assignment_target__", None) + removed += 1 + for value in node.values(): + removed += remove_internal_markers(value) + elif isinstance(node, list): + for item in node: + removed += remove_internal_markers(item) + return removed + + normalized_payload_json = 0 + sanitized_assignment_labels = 0 + invalid_assignment_targets_removed = 0 + files_changed = 0 + + for path in sorted(root.rglob("*.json")): + try: + data = json.loads(path.read_text(encoding="utf-8")) + except Exception: + continue + + file_changed = False + + # IntuneCD expects payloadJson as base64 string; backup may store dict/list. + # Some exports can be list-root JSON, so only access payloadJson on dict roots. + if isinstance(data, dict): + payload = data.get("payloadJson") + if isinstance(payload, (dict, list)): + payload_json = json.dumps(payload, separators=(",", ":"), ensure_ascii=False).encode("utf-8") + data["payloadJson"] = base64.b64encode(payload_json).decode("ascii") + normalized_payload_json += 1 + file_changed = True + + removed = strip_assignment_display_labels(data) + if removed > 0: + sanitized_assignment_labels += removed + file_changed = True + + dropped = prune_invalid_assignment_targets(data) + if dropped > 0: + invalid_assignment_targets_removed += dropped + file_changed = True + + # Clean up internal markers used by prune flow. + if remove_internal_markers(data) > 0: + file_changed = True + + if file_changed: + path.write_text(json.dumps(data, indent=4, ensure_ascii=False) + "\n", encoding="utf-8") + files_changed += 1 + + print( + "Restore payload normalization complete: " + f"filesChanged={files_changed}, " + f"appConfigPayloadJsonNormalized={normalized_payload_json}, " + f"assignmentDisplayLabelsRemoved={sanitized_assignment_labels}, " + f"invalidAssignmentTargetsRemoved={invalid_assignment_targets_removed}" + ) + PY + + cmd=( + IntuneCD-startupdate + --token "$(accessToken)" + --mode=1 + --path "$RESTORE_PATH" + --exit-on-error + ) + + if [ "$DRY_RUN" = "true" ]; then + cmd+=(--report) + fi + if [ "$UPDATE_ASSIGNMENTS" = "true" ]; then + cmd+=(--update-assignments) + fi + if [ "$REMOVE_UNMANAGED" = "true" ]; then + cmd+=(--remove) + fi + if [ "$ENTRA_UPDATE" = "true" ]; then + cmd+=(--entraupdate) + fi + + EXCLUDE_CSV_TRIMMED="$(echo "$EXCLUDE_CSV" | xargs)" + EXCLUDE_CSV_NORMALIZED="$(echo "$EXCLUDE_CSV_TRIMMED" | tr '[:upper:]' '[:lower:]')" + if [ "$EXCLUDE_CSV_NORMALIZED" = "none" ] || [ "$EXCLUDE_CSV_NORMALIZED" = "null" ] || [ "$EXCLUDE_CSV_NORMALIZED" = "n/a" ] || [ "$EXCLUDE_CSV_NORMALIZED" = "-" ] || [ "$EXCLUDE_CSV_NORMALIZED" = "_none_" ]; then + EXCLUDE_CSV_TRIMMED="" + fi + + exclude_items=() + if [ -n "$EXCLUDE_CSV_TRIMMED" ]; then + IFS=',' read -r -a raw_items <<< "$EXCLUDE_CSV_TRIMMED" + for item in "${raw_items[@]}"; do + trimmed="$(echo "$item" | xargs)" + if [ -n "$trimmed" ]; then + exclude_items+=("$trimmed") + fi + done + fi + + has_dms_exclude=0 + for item in "${exclude_items[@]}"; do + if [ "$(echo "$item" | tr '[:upper:]' '[:lower:]')" = "devicemanagementsettings" ]; then + has_dms_exclude=1 + break + fi + done + if [ "$has_dms_exclude" -eq 0 ]; then + exclude_items+=("DeviceManagementSettings") + echo "Auto-excluding DeviceManagementSettings (IntuneCD update requires interactive auth for this category)." + fi + + if [ "${#exclude_items[@]}" -gt 0 ]; then + cmd+=(--exclude) + cmd+=("${exclude_items[@]}") + fi + + echo "Restore command mode: dryRun=$DRY_RUN updateAssignments=$UPDATE_ASSIGNMENTS remove=$REMOVE_UNMANAGED entraupdate=$ENTRA_UPDATE maxWorkers=$MAX_WORKERS sourcePath=$RESTORE_PATH" + if [ "${#exclude_items[@]}" -gt 0 ]; then + joined_excludes="$(IFS=,; echo "${exclude_items[*]}")" + echo "Excluding categories: $joined_excludes" + fi + + intunecd_log="${AGENT_TEMPDIRECTORY:-/tmp}/intunecd-restore.log" + rm -f "$intunecd_log" + set +e + "${cmd[@]}" >"$intunecd_log" 2>&1 + intunecd_rc=$? + set -e + echo "IntuneCD exit code captured: $intunecd_rc" + + if [ "$intunecd_rc" -ne 0 ]; then + echo "IntuneCD restore/update failed with exit code: $intunecd_rc" + marker_pattern="error|\\[ERROR\\]|\\[CRITICAL\\]|request failed|failed with status|modelvalidationfailure|traceback|exception|error updating|failed after|unable to|forbidden|unauthorized" + marker_count="$(grep -Eic "$marker_pattern" "$intunecd_log" || true)" + echo "Detected error-marker lines: $marker_count" + echo "Relevant markers from full output (line:number:text):" + grep -Ein "$marker_pattern" "$intunecd_log" | tail -n 200 || true + echo "First 80 lines of IntuneCD output:" + head -n 80 "$intunecd_log" || true + echo "Last 120 lines of IntuneCD output:" + tail -n 120 "$intunecd_log" || true + if [ "${marker_count:-0}" -eq 0 ]; then + echo "##vso[task.logissue type=warning]IntuneCD returned non-zero without explicit error markers; treating as successful no-op." + intunecd_rc=0 + fi + else + echo "Last 60 lines of IntuneCD output:" + tail -n 60 "$intunecd_log" || true + fi + + if [ "$intunecd_rc" -ne 0 ]; then + exit "$intunecd_rc" + fi + failOnStderr: false + env: + DRY_RUN: ${{ parameters.dryRun }} + UPDATE_ASSIGNMENTS: ${{ parameters.updateAssignments }} + REMOVE_UNMANAGED: ${{ parameters.removeObjectsNotInBaseline }} + ENTRA_UPDATE: ${{ parameters.includeEntraUpdate }} + MAX_WORKERS: ${{ parameters.maxWorkers }} + EXCLUDE_CSV: ${{ parameters.excludeCsv }} + GRAPH_TOKEN: $(accessToken) diff --git a/azure-pipelines-review-sync.yml b/azure-pipelines-review-sync.yml new file mode 100644 index 0000000..45640e4 --- /dev/null +++ b/azure-pipelines-review-sync.yml @@ -0,0 +1,194 @@ +trigger: none +pr: none + +schedules: + - cron: "*/20 * * * *" + displayName: "Review decision sync (every 20 minutes)" + branches: + include: + - main + always: true + batch: true + +variables: + # Tenant-specific values are expected in a variable group (see templates/variables-tenant.yml). + # Uncomment the line below after creating the group in your Azure DevOps project. + # - group: vg-astral-tenant + - template: templates/variables-common.yml + +jobs: + - job: sync_intune_review_decisions + displayName: Sync Intune reviewer decisions + condition: eq(variables['ENABLE_WORKLOAD_INTUNE'], 'true') + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Apply reviewer /reject decisions (Intune) + condition: eq(variables['ENABLE_PR_REVIEWER_DECISIONS'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/apply_reviewer_rejections.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "intune" \ + --drift-branch "$(DRIFT_BRANCH_INTUNE)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + + - task: Bash@3 + displayName: Update automated reviewer summary (Intune) + condition: eq(variables['ENABLE_PR_REVIEW_SUMMARY'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/update_pr_review_summary.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "intune" \ + --backup-folder "$(BACKUP_FOLDER)" \ + --reports-subdir "$(REPORTS_SUBDIR)" \ + --drift-branch "$(DRIFT_BRANCH_INTUNE)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + ENABLE_PR_AI_SUMMARY: $(ENABLE_PR_AI_SUMMARY) + AZURE_OPENAI_ENDPOINT: $(AZURE_OPENAI_ENDPOINT) + AZURE_OPENAI_DEPLOYMENT: $(AZURE_OPENAI_DEPLOYMENT) + AZURE_OPENAI_API_KEY: $(AZURE_OPENAI_API_KEY) + AZURE_OPENAI_API_VERSION: $(AZURE_OPENAI_API_VERSION) + REQUIRE_CHANGE_TICKETS: $(REQUIRE_CHANGE_TICKETS) + CHANGE_TICKET_REGEX: $(CHANGE_TICKET_REGEX) + DEBUG_CHANGE_TICKET_THREADS: $(DEBUG_CHANGE_TICKET_THREADS) + ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS: $(ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS) + + - task: Bash@3 + displayName: Queue post-merge remediation from reviewer /reject (Intune) + condition: eq(variables['AUTO_REMEDIATE_AFTER_MERGE'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/queue_post_merge_restore.py" \ + --workload "intune" \ + --drift-branch "$(DRIFT_BRANCH_INTUNE)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + AUTO_REMEDIATE_AFTER_MERGE: $(AUTO_REMEDIATE_AFTER_MERGE) + AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS: $(AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS) + AUTO_REMEDIATE_RESTORE_PIPELINE_ID: $(AUTO_REMEDIATE_RESTORE_PIPELINE_ID) + AUTO_REMEDIATE_DRY_RUN: $(AUTO_REMEDIATE_DRY_RUN) + AUTO_REMEDIATE_UPDATE_ASSIGNMENTS: $(AUTO_REMEDIATE_UPDATE_ASSIGNMENTS) + AUTO_REMEDIATE_REMOVE_OBJECTS: $(AUTO_REMEDIATE_REMOVE_OBJECTS) + AUTO_REMEDIATE_MAX_WORKERS: $(AUTO_REMEDIATE_MAX_WORKERS) + AUTO_REMEDIATE_EXCLUDE_CSV: $(AUTO_REMEDIATE_EXCLUDE_CSV) + AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE: false + + - job: sync_entra_review_decisions + displayName: Sync Entra reviewer decisions + condition: eq(variables['ENABLE_WORKLOAD_ENTRA'], 'true') + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Apply reviewer /reject decisions (Entra) + condition: eq(variables['ENABLE_PR_REVIEWER_DECISIONS'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/apply_reviewer_rejections.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "entra" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + + - task: Bash@3 + displayName: Update automated reviewer summary (Entra) + condition: eq(variables['ENABLE_PR_REVIEW_SUMMARY'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/update_pr_review_summary.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "entra" \ + --backup-folder "$(BACKUP_FOLDER)" \ + --reports-subdir "$(REPORTS_SUBDIR)" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + ENABLE_PR_AI_SUMMARY: $(ENABLE_PR_AI_SUMMARY) + AZURE_OPENAI_ENDPOINT: $(AZURE_OPENAI_ENDPOINT) + AZURE_OPENAI_DEPLOYMENT: $(AZURE_OPENAI_DEPLOYMENT) + AZURE_OPENAI_API_KEY: $(AZURE_OPENAI_API_KEY) + AZURE_OPENAI_API_VERSION: $(AZURE_OPENAI_API_VERSION) + REQUIRE_CHANGE_TICKETS: $(REQUIRE_CHANGE_TICKETS) + CHANGE_TICKET_REGEX: $(CHANGE_TICKET_REGEX) + DEBUG_CHANGE_TICKET_THREADS: $(DEBUG_CHANGE_TICKET_THREADS) + ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS: $(ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS) + + - task: Bash@3 + displayName: Queue post-merge remediation from reviewer /reject (Entra) + condition: eq(variables['AUTO_REMEDIATE_AFTER_MERGE'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/queue_post_merge_restore.py" \ + --workload "entra" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + AUTO_REMEDIATE_AFTER_MERGE: $(AUTO_REMEDIATE_AFTER_MERGE) + AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS: $(AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS) + AUTO_REMEDIATE_RESTORE_PIPELINE_ID: $(AUTO_REMEDIATE_RESTORE_PIPELINE_ID) + AUTO_REMEDIATE_DRY_RUN: $(AUTO_REMEDIATE_DRY_RUN) + AUTO_REMEDIATE_UPDATE_ASSIGNMENTS: $(AUTO_REMEDIATE_UPDATE_ASSIGNMENTS) + AUTO_REMEDIATE_REMOVE_OBJECTS: $(AUTO_REMEDIATE_REMOVE_OBJECTS) + AUTO_REMEDIATE_MAX_WORKERS: $(AUTO_REMEDIATE_MAX_WORKERS) + AUTO_REMEDIATE_EXCLUDE_CSV: $(AUTO_REMEDIATE_EXCLUDE_CSV) + AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE: true diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 0000000..d1964ea --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,2147 @@ +trigger: none + +parameters: + - name: forceFullRun + type: boolean + default: false + +schedules: + - cron: "0 * * * *" + displayName: "Hourly backup (full run at configured timezone)" + branches: + include: + - main + always: true + batch: true + +variables: + # Tenant-specific values are expected in a variable group (see templates/variables-tenant.yml). + # Uncomment the line below after creating the group in your Azure DevOps project. + # - group: vg-astral-tenant + - template: templates/variables-common.yml + - name: ROLLING_PR_TITLE_INTUNE + value: "Intune drift review (rolling)" + - name: ROLLING_PR_TITLE_ENTRA + value: "Entra drift review (rolling)" + - name: INTUNE_BACKUP_SUBDIR + value: intune + - name: ENTRA_BACKUP_SUBDIR + value: entra + - name: INTUNECD_VERSION + value: 2.5.0 + - name: MD_TO_PDF_VERSION + value: 5.2.5 + - name: EXCLUDE_SCRIPT_BACKUP + value: false + - name: ENABLE_ENTRA_CONDITIONAL_ACCESS + value: true + - name: INTUNE_EXCLUDE_CSV + value: "" + - name: ENTRA_INCLUDE_NAMED_LOCATIONS + value: true + - name: ENTRA_INCLUDE_AUTHENTICATION_STRENGTHS + value: true + - name: ENTRA_INCLUDE_CONDITIONAL_ACCESS + value: true + - name: ENTRA_INCLUDE_ENTERPRISE_APPS + value: true + - name: ENTRA_ENTERPRISE_APP_WORKERS + value: 8 + - name: ENTRA_INCLUDE_APP_REGISTRATIONS + value: true + - name: SPLIT_DOCUMENTATION + value: true + - name: ENABLE_TAGGING + value: false + - name: ROLLING_PR_MERGE_STRATEGY + value: rebase + - name: AUTO_REMEDIATE_ON_PR_REJECTION + value: true + +jobs: + - job: test_python + displayName: Run Python unit tests + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + + - task: Bash@3 + displayName: unittest discover + inputs: + targetType: inline + script: | + set -euo pipefail + python3 -m unittest discover -s tests -v + workingDirectory: "$(Build.SourcesDirectory)" + + - job: backup_intune + displayName: Backup & commit Intune configuration + condition: eq(variables['ENABLE_WORKLOAD_INTUNE'], 'true') + pool: + name: $(AGENT_POOL_NAME) + continueOnError: false + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Snapshot validation helper script (Intune job) + inputs: + targetType: inline + script: | + set -euo pipefail + SCRIPT_ROOT="$(Agent.TempDirectory)/pipeline-scripts-intune" + rm -rf "$SCRIPT_ROOT" + mkdir -p "$SCRIPT_ROOT" + cp "$(Build.SourcesDirectory)/scripts/validate_backup_outputs.py" "$SCRIPT_ROOT/validate_backup_outputs.py" + chmod +x "$SCRIPT_ROOT/validate_backup_outputs.py" + echo "##vso[task.setvariable variable=PIPELINE_SCRIPT_ROOT]$SCRIPT_ROOT" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Prepare drift branch from baseline + inputs: + targetType: inline + script: | + set -euo pipefail + if git ls-remote --exit-code --heads origin "$(DRIFT_BRANCH_INTUNE)" >/dev/null 2>&1; then + git fetch --quiet origin "$(BASELINE_BRANCH)" "$(DRIFT_BRANCH_INTUNE)" + else + git fetch --quiet origin "$(BASELINE_BRANCH)" + fi + git checkout --force -B "$(DRIFT_BRANCH_INTUNE)" "origin/$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Determine run mode (light vs full) + name: setRunMode + inputs: + targetType: inline + script: | + set -euo pipefail + LOCAL_NOW="$(TZ=$(BACKUP_TIMEZONE) date '+%Y-%m-%d %H:%M:%S %Z')" + LOCAL_HOUR="$(TZ=$(BACKUP_TIMEZONE) date '+%H')" + FORCE_FULL_PARAM="$(echo '${{ parameters.forceFullRun }}' | tr '[:upper:]' '[:lower:]')" + if [ "$FORCE_FULL_PARAM" = "true" ]; then + FULL_RUN=1 + MODE="full" + REASON="forced by parameter forceFullRun=true" + elif [ "$PRAGUE_HOUR" = "00" ]; then + FULL_RUN=1 + MODE="full" + REASON="scheduled midnight full run" + else + FULL_RUN=0 + MODE="light" + REASON="default hourly light run" + fi + echo "Run mode decision: $MODE ($REASON; Prague local time: $PRAGUE_NOW)" + echo "##vso[task.setvariable variable=FULL_RUN]$FULL_RUN" + echo "##vso[task.setvariable variable=FULL_RUN;isOutput=true]$FULL_RUN" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Reset backup workspace for Intune workload + inputs: + targetType: inline + script: | + set -euo pipefail + INTUNE_ROOT="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" + INTUNE_REPORTS_ROOT="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/intune" + rm -rfv "$INTUNE_ROOT" + rm -rfv "$INTUNE_REPORTS_ROOT" + mkdir -p "$INTUNE_ROOT" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Install IntuneCD + inputs: + targetType: inline + script: | + set -euo pipefail + pip3 install "IntuneCD==$(INTUNECD_VERSION)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: AzurePowerShell@5 + displayName: Get Graph Token for Workload Federated Credential + inputs: + azureSubscription: $(SERVICE_CONNECTION_NAME) + azurePowerShellVersion: LatestVersion + ScriptType: inlineScript + Inline: | + $getTokenParams = @{ + ResourceTypeName = 'MSGraph' + AsSecureString = $true + ErrorAction = 'Stop' + } + $tokenCommand = Get-Command Get-AzAccessToken -ErrorAction Stop + if ($tokenCommand.Parameters.ContainsKey('ForceRefresh')) { + $getTokenParams['ForceRefresh'] = $true + Write-Host "Requesting Graph token with ForceRefresh=true" + } else { + Write-Host "Get-AzAccessToken does not support ForceRefresh in this Az.Accounts version" + } + $accessToken = ([PSCredential]::New('dummy', (Get-AzAccessToken @getTokenParams).Token).GetNetworkCredential().Password) + + $tokenParts = $accessToken.Split('.') + if ($tokenParts.Length -lt 2) { throw "Invalid Graph access token format." } + $payload = $tokenParts[1].Replace('-', '+').Replace('_', '/') + switch ($payload.Length % 4) { + 2 { $payload += '==' } + 3 { $payload += '=' } + } + $payloadJson = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($payload)) + $claims = $payloadJson | ConvertFrom-Json + $roles = @($claims.roles) + + $appId = [string]$claims.appid + $appIdFingerprint = if ($appId.Length -ge 16) { "$($appId.Substring(0,8))...$($appId.Substring($appId.Length-8,8))" } else { $appId } + $oid = [string]$claims.oid + $sortedRoles = $roles | Sort-Object + + $issuedAt = if ($claims.iat) { [DateTimeOffset]::FromUnixTimeSeconds([int64]$claims.iat).UtcDateTime.ToString("yyyy-MM-ddTHH:mm:ssZ") } else { "n/a" } + $expiresAt = if ($claims.exp) { [DateTimeOffset]::FromUnixTimeSeconds([int64]$claims.exp).UtcDateTime.ToString("yyyy-MM-ddTHH:mm:ssZ") } else { "n/a" } + Write-Host "Graph token claims: appid=$appId appid(fingerprint)=$appIdFingerprint oid=$oid tid=$($claims.tid) aud=$($claims.aud) iat=$issuedAt exp=$expiresAt" + Write-Host "Graph token roles: $($sortedRoles -join ', ')" + + $requiredRoles = @('DeviceManagementScripts.Read.All', 'DeviceManagementScripts.ReadWrite.All') + if (-not ($roles | Where-Object { $requiredRoles -contains $_ })) { + if ("$(EXCLUDE_SCRIPT_BACKUP)" -eq "true") { + Write-Host "##vso[task.logissue type=warning]Graph token does not contain DeviceManagementScripts.Read.All or DeviceManagementScripts.ReadWrite.All." + Write-Host "##vso[task.logissue type=warning]Continuing because EXCLUDE_SCRIPT_BACKUP=true and script categories are excluded from backup." + } else { + Write-Host "##vso[task.logissue type=error]Graph token does not contain DeviceManagementScripts.Read.All or DeviceManagementScripts.ReadWrite.All." + throw "Service connection token is missing required script permissions." + } + } + + if ("$(ENABLE_ENTRA_CONDITIONAL_ACCESS)" -ne "true") { + $missingConditionalAccessRoles = @() + if (-not ($roles -contains 'Policy.Read.All')) { $missingConditionalAccessRoles += 'Policy.Read.All' } + if (-not ($roles -contains 'Policy.Read.ConditionalAccess')) { $missingConditionalAccessRoles += 'Policy.Read.ConditionalAccess' } + if ($missingConditionalAccessRoles.Count -gt 0) { + Write-Host "##vso[task.logissue type=error]Graph token is missing Conditional Access roles: $($missingConditionalAccessRoles -join ', ')" + throw "Service connection token is missing required Conditional Access permissions." + } + } + + if (-not ($roles -contains 'Group.Read.All')) { + Write-Host "##vso[task.logissue type=warning]Graph token does not contain Group.Read.All." + if ($roles -contains 'GroupSettings.Read.All') { + Write-Host "##vso[task.logissue type=warning]GroupSettings.Read.All is present but it does not replace Group.Read.All for assignment target group resolution." + } + Write-Host "##vso[task.logissue type=warning]Group-assigned targets may be exported without groupId/groupDisplayName." + } + + Write-Host "##vso[task.setvariable variable=accessToken;issecret=true]$accessToken" + + - task: Bash@3 + displayName: Create Intune backup + inputs: + targetType: inline + script: | + set -euo pipefail + INTUNE_ROOT="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" + mkdir -p "$INTUNE_ROOT" + + BACKUP_START=$(date +%Y.%m.%d:%H.%M.%S) + echo "##vso[task.setVariable variable=BACKUP_START]$BACKUP_START" + + backup_log="$(mktemp)" + trap 'rm -f "$backup_log"' EXIT + + EXCLUDE_ARGS=( + CompliancePartnerHeartbeat + ManagedGooglePlay + VPPusedLicenseCount + ) + if [ "$(ENABLE_ENTRA_CONDITIONAL_ACCESS)" = "true" ]; then + EXCLUDE_ARGS+=(ConditionalAccess) + fi + if [ "$(EXCLUDE_SCRIPT_BACKUP)" = "true" ]; then + EXCLUDE_ARGS+=( + ComplianceScripts + CustomAttributes + ProactiveRemediation + PowershellScripts + ShellScripts + ) + fi + if [ -n "$(INTUNE_EXCLUDE_CSV)" ]; then + IFS=',' read -r -a raw_items <<< "$(INTUNE_EXCLUDE_CSV)" + for item in "${raw_items[@]}"; do + trimmed="$(echo "$item" | xargs)" + if [ -n "$trimmed" ]; then + EXCLUDE_ARGS+=("$trimmed") + fi + done + fi + + set +e + IntuneCD-startbackup \ + --token "$(accessToken)" \ + --mode=1 \ + --output=json \ + --path="$INTUNE_ROOT" \ + --exclude "${EXCLUDE_ARGS[@]}" \ + --append-id \ + --ignore-omasettings \ + --enrich-documentation \ + 2>&1 | tee "$backup_log" + intunecd_exit="${PIPESTATUS[0]}" + set -e + + handled_403=0 + has_auth_403=0 + if grep -Eiq "Application is not authorized to perform this operation|one of the following scopes|Request failed with status 403" "$backup_log"; then + has_auth_403=1 + if [ "$(EXCLUDE_SCRIPT_BACKUP)" = "true" ] && grep -Eiq "DeviceManagementScripts\\.Read\\.All|DeviceManagementScripts\\.ReadWrite\\.All" "$backup_log"; then + echo "##vso[task.logissue type=warning]Ignoring script-related 403 because EXCLUDE_SCRIPT_BACKUP=true." + handled_403=1 + fi + if [ "$(ENABLE_ENTRA_CONDITIONAL_ACCESS)" = "true" ] && grep -Eiq "Policy\\.Read\\.ConditionalAccess|conditionalAccess/policies" "$backup_log"; then + echo "##vso[task.logissue type=warning]Ignoring Conditional Access-related 403 in Intune workload because ENABLE_ENTRA_CONDITIONAL_ACCESS=true." + handled_403=1 + fi + if [ "$handled_403" -ne 1 ]; then + echo "##vso[task.logissue type=error]Intune backup has Microsoft Graph authorization failures (HTTP 403)." + echo "##vso[task.logissue type=error]Grant required Graph application permissions and admin consent." + exit 1 + fi + fi + + non_403_statuses="$(grep -Eo "status[[:space:]]+[0-9]{3}" "$backup_log" | grep -Eo "[0-9]{3}" | sort -u | grep -Ev "^403$" || true)" + if [ -n "$non_403_statuses" ]; then + echo "##vso[task.logissue type=error]Intune backup log contains non-403 HTTP failures: $non_403_statuses" + exit 1 + fi + + if [ "$intunecd_exit" -ne 0 ]; then + if [ "$has_auth_403" -eq 1 ] && [ "$handled_403" -eq 1 ]; then + echo "##vso[task.logissue type=warning]IntuneCD exited non-zero, but only allowed 403 scopes were detected for this mode. Continuing." + else + echo "##vso[task.logissue type=error]IntuneCD backup command failed with exit code $intunecd_exit." + exit "$intunecd_exit" + fi + fi + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Revert partial Intune Settings Catalog exports + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/filter_intune_partial_settings_noise.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --backup-root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" \ + --baseline-ref "origin/$(BASELINE_BRANCH)" \ + --fail-on-unresolved-partial-exports true + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Resolve assignment group names + inputs: + targetType: inline + script: | + set -euo pipefail + python3 - <<'PY' + import json + import os + import pathlib + import re + import urllib.error + import urllib.parse + import urllib.request + + root = pathlib.Path(os.environ["BUILD_SOURCESDIRECTORY"]) / os.environ["BACKUP_FOLDER"] + token = os.environ.get("ACCESS_TOKEN", "").strip() + if not token: + print("No Graph token available. Skipping assignment group name enrichment.") + raise SystemExit(0) + + if not root.exists(): + print(f"Backup folder not found: {root}. Skipping assignment group name enrichment.") + raise SystemExit(0) + + group_target_type = "#microsoft.graph.groupAssignmentTarget" + guid_pattern = re.compile( + r"([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})" + ) + + cache = {} + assignment_cache = {} + looked_up = 0 + updated_files = 0 + group_targets_seen = 0 + group_targets_without_id_before = 0 + group_targets_without_id_after = 0 + files_with_missing_group_targets = 0 + files_with_restored_group_ids = 0 + restored_group_ids = 0 + assignment_endpoint_hits = 0 + + assignment_endpoint_templates = [ + "https://graph.microsoft.com/beta/deviceManagement/deviceConfigurations/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/configurationPolicies/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/groupPolicyConfigurations/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/deviceCompliancePolicies/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/deviceHealthScripts/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/deviceManagementScripts/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/deviceShellScripts/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/deviceCustomAttributeShellScripts/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/deviceEnrollmentConfigurations/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceManagement/windowsAutopilotDeploymentProfiles/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceAppManagement/mobileApps/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceAppManagement/mobileAppConfigurations/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceAppManagement/targetedManagedAppConfigurations/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceAppManagement/androidManagedAppProtections/{object_id}/assignments", + "https://graph.microsoft.com/beta/deviceAppManagement/iosManagedAppProtections/{object_id}/assignments", + ] + + def object_id_from_filename(file_path: pathlib.Path): + match = guid_pattern.search(file_path.name) + if not match: + return None + return match.group(1) + + def resolve_group_name(group_id: str): + nonlocal_vars["looked_up"] += 1 + if group_id in cache: + return cache[group_id] + url = ( + "https://graph.microsoft.com/v1.0/groups/" + + urllib.parse.quote(group_id) + + "?$select=displayName" + ) + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/json", + }, + method="GET", + ) + try: + with urllib.request.urlopen(req, timeout=20) as resp: + payload = json.loads(resp.read().decode("utf-8")) + name = payload.get("displayName") + except urllib.error.HTTPError as exc: + print(f"Warning: unable to resolve group {group_id} (HTTP {exc.code})") + name = None + except Exception as exc: + print(f"Warning: unable to resolve group {group_id} ({exc})") + name = None + cache[group_id] = name + return name + + def assignment_signature(assignment): + if not isinstance(assignment, dict): + return (None, None, None, None, None) + target = assignment.get("target") + if not isinstance(target, dict): + return (assignment.get("intent"), assignment.get("source"), None, None, None) + return ( + assignment.get("intent"), + assignment.get("source"), + target.get("@odata.type"), + target.get("deviceAndAppManagementAssignmentFilterId"), + target.get("deviceAndAppManagementAssignmentFilterType"), + ) + + def fetch_assignments(object_id: str): + if object_id in assignment_cache: + return assignment_cache[object_id] + safe_id = urllib.parse.quote(object_id) + for endpoint_template in assignment_endpoint_templates: + url = endpoint_template.format(object_id=safe_id) + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/json", + }, + method="GET", + ) + try: + with urllib.request.urlopen(req, timeout=20) as resp: + payload = json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + if exc.code in (400, 404): + continue + print(f"Warning: assignment query failed for object {object_id} at {url} (HTTP {exc.code})") + continue + except Exception as exc: + print(f"Warning: assignment query failed for object {object_id} at {url} ({exc})") + continue + + value = payload.get("value") + if isinstance(value, list): + assignment_cache[object_id] = (value, url) + return value, url + + assignment_cache[object_id] = ([], None) + return assignment_cache[object_id] + + def collect_group_assignments(node): + found = [] + if isinstance(node, dict): + target = node.get("target") + if isinstance(target, dict) and target.get("@odata.type") == group_target_type: + found.append(node) + for value in node.values(): + found.extend(collect_group_assignments(value)) + elif isinstance(node, list): + for item in node: + found.extend(collect_group_assignments(item)) + return found + + nonlocal_vars = {"looked_up": 0} + json_files = sorted(root.rglob("*.json")) + for file_path in json_files: + try: + content = json.loads(file_path.read_text(encoding="utf-8")) + except Exception: + continue + + file_changed = False + local_group_assignments = collect_group_assignments(content) + if not local_group_assignments: + continue + + file_missing_before = 0 + file_missing_assignments = [] + for assignment in local_group_assignments: + target = assignment.get("target", {}) + group_targets_seen += 1 + group_id = target.get("groupId") + if isinstance(group_id, str) and group_id: + group_name = resolve_group_name(group_id) + if group_name and target.get("groupDisplayName") != group_name: + target["groupDisplayName"] = group_name + file_changed = True + else: + group_targets_without_id_before += 1 + file_missing_before += 1 + file_missing_assignments.append(assignment) + + if file_missing_before > 0: + files_with_missing_group_targets += 1 + object_id = object_id_from_filename(file_path) + if object_id: + remote_assignments, matched_endpoint = fetch_assignments(object_id) + if matched_endpoint: + assignment_endpoint_hits += 1 + remote_group_assignments = [] + for remote_assignment in remote_assignments: + if not isinstance(remote_assignment, dict): + continue + remote_target = remote_assignment.get("target") + if not isinstance(remote_target, dict): + continue + if remote_target.get("@odata.type") != group_target_type: + continue + remote_group_id = remote_target.get("groupId") + if isinstance(remote_group_id, str) and remote_group_id: + remote_group_assignments.append(remote_assignment) + + if remote_group_assignments: + by_signature = {} + for item in remote_group_assignments: + by_signature.setdefault(assignment_signature(item), []).append(item) + used_remote = set() + file_restored_count = 0 + + for local_assignment in file_missing_assignments: + selected_remote = None + sig = assignment_signature(local_assignment) + candidates = by_signature.get(sig, []) + for candidate in candidates: + candidate_id = id(candidate) + if candidate_id in used_remote: + continue + selected_remote = candidate + used_remote.add(candidate_id) + break + if selected_remote is None: + for candidate in remote_group_assignments: + candidate_id = id(candidate) + if candidate_id in used_remote: + continue + selected_remote = candidate + used_remote.add(candidate_id) + break + if selected_remote is None: + continue + + remote_target = selected_remote.get("target", {}) + remote_group_id = remote_target.get("groupId") + if not (isinstance(remote_group_id, str) and remote_group_id): + continue + + local_target = local_assignment.get("target", {}) + local_target["groupId"] = remote_group_id + remote_group_name = remote_target.get("groupDisplayName") or remote_target.get("groupName") + if isinstance(remote_group_name, str) and remote_group_name: + local_target["groupDisplayName"] = remote_group_name + else: + group_name = resolve_group_name(remote_group_id) + if group_name: + local_target["groupDisplayName"] = group_name + file_changed = True + file_restored_count += 1 + restored_group_ids += 1 + + if file_restored_count > 0: + files_with_restored_group_ids += 1 + + file_missing_after = 0 + for assignment in local_group_assignments: + target = assignment.get("target", {}) + group_id = target.get("groupId") + if not (isinstance(group_id, str) and group_id): + file_missing_after += 1 + group_targets_without_id_after += file_missing_after + + if file_changed: + file_path.write_text(json.dumps(content, indent=5, ensure_ascii=False) + "\n", encoding="utf-8") + updated_files += 1 + + looked_up = nonlocal_vars["looked_up"] + print( + "Assignment group name enrichment complete. " + + f"Files updated: {updated_files}. " + + f"Group lookups performed: {looked_up}. " + + f"Assignment endpoints matched: {assignment_endpoint_hits}. " + + f"Group IDs restored from Graph assignments: {restored_group_ids}." + ) + if files_with_missing_group_targets > 0: + print( + "Assignment targets missing groupId before fallback: " + + str(group_targets_without_id_before) + + " across " + + str(files_with_missing_group_targets) + + " files." + ) + if files_with_restored_group_ids > 0: + print( + "Assignment targets restored from endpoint fallback in " + + str(files_with_restored_group_ids) + + " files." + ) + if group_targets_seen > 0 and group_targets_without_id_after > 0: + print( + "Warning: " + + str(group_targets_without_id_after) + + " of " + + str(group_targets_seen) + + " group assignment targets still do not include groupId after fallback." + ) + print("Warning: when groupId is unavailable from source APIs, groupDisplayName cannot be resolved.") + PY + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + BACKUP_FOLDER: $(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR) + ACCESS_TOKEN: $(accessToken) + BUILD_SOURCESDIRECTORY: $(Build.SourcesDirectory) + + - task: Bash@3 + displayName: Generate policy assignment report + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/generate_assignment_report.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" \ + --output-dir "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/intune" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Generate apps inventory report + condition: and(eq(variables['ENABLE_WORKLOAD_ENTRA'], 'false'), or(eq(variables['ENTRA_INCLUDE_APP_REGISTRATIONS'], 'true'), eq(variables['ENTRA_INCLUDE_ENTERPRISE_APPS'], 'true'))) + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/generate_app_inventory_report.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --output-dir "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/entra" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Generate object inventory reports + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/generate_object_inventory_reports.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" \ + --output-dir "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/intune" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Validate Intune backup outputs + inputs: + targetType: inline + script: | + set -euo pipefail + MODE="light" + if [ "$(FULL_RUN)" = "1" ]; then + MODE="full" + fi + python3 "$(PIPELINE_SCRIPT_ROOT)/validate_backup_outputs.py" \ + --workload intune \ + --mode "$MODE" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" \ + --reports-root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/intune" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: PowerShell@2 + displayName: Find change author & commit the backup + name: commitAndSetVariable + inputs: + targetType: inline + script: | + $root = "$(Build.SourcesDirectory)" + Set-Location $root + + $serviceCommitUserName = "$(USER_NAME)" + if ([string]::IsNullOrWhiteSpace($serviceCommitUserName)) { $serviceCommitUserName = "unknown" } + $serviceCommitUserEmail = "$(USER_EMAIL)" + if ([string]::IsNullOrWhiteSpace($serviceCommitUserEmail)) { $serviceCommitUserEmail = "unknown@unknown.com" } + + $fallbackCommitUserName = $serviceCommitUserName + $fallbackCommitUserEmail = $serviceCommitUserEmail + + $buildReason = "$(Build.Reason)" + $requestedForName = "$(Build.RequestedFor)" + $requestedForEmail = "$(Build.RequestedForEmail)" + if ( + $buildReason -ne "Schedule" -and + -not [string]::IsNullOrWhiteSpace($requestedForEmail) -and + $requestedForEmail -like "*@*" + ) { + $fallbackCommitUserEmail = $requestedForEmail + if (-not [string]::IsNullOrWhiteSpace($requestedForName)) { + $fallbackCommitUserName = $requestedForName + } else { + $fallbackCommitUserName = ($requestedForEmail -split "@")[0] + } + Write-Host "Fallback commit identity: manual requester '$fallbackCommitUserName <$fallbackCommitUserEmail>'" + } else { + Write-Host "Fallback commit identity: service '$fallbackCommitUserName <$fallbackCommitUserEmail>'" + } + + git config user.name $fallbackCommitUserName + git config user.email $fallbackCommitUserEmail + git config core.longpaths true + git config core.quotepath off + git config core.eol lf + git config core.autocrlf false + + $untrackedFile = git ls-files --others --exclude-standard --full-name + $trackedFile = git ls-files --modified --full-name + $generatedSplitMarkdownPattern = '^' + [Regex]::Escape("$(BACKUP_FOLDER)") + '/.*\.md$' + $generatedReportPattern = '^' + [Regex]::Escape("$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/") + $workloadConfigPattern = '^' + [Regex]::Escape("$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)/") + $changedFile = $untrackedFile, $trackedFile | % { $_ } | ? { + $_ -and + $_ -match $workloadConfigPattern -and + $_ -notmatch $generatedSplitMarkdownPattern -and + $_ -notmatch $generatedReportPattern -and + $_ -notlike "*/Assignment Report/*" + } + + if ($changedFile) { + git show-ref --verify --quiet "refs/remotes/origin/$(DRIFT_BRANCH_INTUNE)" + $hasRemoteDrift = $LASTEXITCODE -eq 0 + if ($hasRemoteDrift) { + git diff --quiet "origin/$(DRIFT_BRANCH_INTUNE)" -- "$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" + if ($LASTEXITCODE -eq 0) { + "No Intune change detected (snapshot identical to existing drift branch)" + echo "##vso[task.setVariable variable=CHANGE_DETECTED]0" + echo "##vso[task.setVariable variable=CHANGE_DETECTED;isOutput=true]0" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED]1" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED;isOutput=true]1" + exit 0 + } + } + + if (!(Get-Module "Microsoft.Graph.DeviceManagement.Administration" -ListAvailable)) { + Install-Module Microsoft.Graph.DeviceManagement.Administration -AllowClobber -Force -AcceptLicense + } + + Write-Host "Authenticating to Graph API" + $secureToken = ConvertTo-SecureString -String "$(accessToken)" -AsPlainText -Force + Connect-MgGraph -AccessToken $secureToken -NoWelcome + + function _startProcess { + [CmdletBinding()] + param ( + [string] $filePath = '', + [string] $argumentList = '', + [string] $workingDirectory = (Get-Location), + [switch] $dontWait, + [switch] $outputErr2Std + ) + + $p = New-Object System.Diagnostics.Process + $p.StartInfo.UseShellExecute = $false + $p.StartInfo.RedirectStandardOutput = $true + $p.StartInfo.RedirectStandardError = $true + $p.StartInfo.WorkingDirectory = $workingDirectory + $p.StartInfo.FileName = $filePath + $p.StartInfo.Arguments = $argumentList + [void]$p.Start() + if (!$dontWait) { $p.WaitForExit() } + + $result = $p.StandardOutput.ReadToEnd() + if ($result) { $result } + + if ($outputErr2Std) { + $p.StandardError.ReadToEnd() + } else { + if ($err = $p.StandardError.ReadToEnd()) { Write-Error $err } + } + } + + function _getFirstNonEmptyString { + [CmdletBinding()] + param ([object] $value) + + if ($value -is [string]) { + if (-not [string]::IsNullOrWhiteSpace($value)) { + return $value.Trim() + } + return $null + } + + if ($value -is [System.Collections.IEnumerable]) { + foreach ($item in $value) { + $resolved = _getFirstNonEmptyString $item + if ($resolved) { + return $resolved + } + } + } + + if ($null -ne $value) { + $stringValue = [string] $value + if (-not [string]::IsNullOrWhiteSpace($stringValue)) { + return $stringValue.Trim() + } + } + + return $null + } + + function _getGraphPropertyValue { + [CmdletBinding()] + param ( + [object] $object, + [string[]] $candidateNames + ) + + if ($null -eq $object) { + return $null + } + + foreach ($candidateName in $candidateNames) { + $property = $object.PSObject.Properties | ? { $_.Name -eq $candidateName } | select -First 1 + if ($property) { + $resolved = _getFirstNonEmptyString $property.Value + if ($resolved) { + return $resolved + } + } + } + + $additionalProperty = $object.PSObject.Properties | ? { $_.Name -eq 'AdditionalProperties' } | select -First 1 + if ($additionalProperty -and $additionalProperty.Value -is [System.Collections.IDictionary]) { + $additionalPropertyKeys = @($additionalProperty.Value.Keys) + foreach ($candidateName in $candidateNames) { + $candidateKeys = @($candidateName) + if ($candidateName.Length -gt 0) { + $candidateKeys += ($candidateName.Substring(0, 1).ToLowerInvariant() + $candidateName.Substring(1)) + } + foreach ($candidateKey in ($candidateKeys | Select-Object -Unique)) { + if ($additionalPropertyKeys -contains $candidateKey) { + $resolved = _getFirstNonEmptyString $additionalProperty.Value[$candidateKey] + if ($resolved) { + return $resolved + } + } + } + } + } + + return $null + } + + function _getActorIdentity { + [CmdletBinding()] + param ([object] $actor) + + # Graph module updates can move actor fields between typed properties and AdditionalProperties. + $userPrincipalName = _getGraphPropertyValue $actor @('UserPrincipalName', 'EmailAddress', 'Email') + $userDisplayName = _getGraphPropertyValue $actor @('UserDisplayName', 'DisplayName') + if ($userPrincipalName) { + $actorName = $userDisplayName + if (-not $actorName) { + $actorName = ($userPrincipalName -split '@')[0] + } + return [PSCustomObject]@{ + Key = "user:$userPrincipalName" + Value = $userPrincipalName + Name = $actorName + } + } + + $applicationDisplayName = _getGraphPropertyValue $actor @('ApplicationDisplayName', 'AppDisplayName', 'ApplicationName') + if ($applicationDisplayName) { + return [PSCustomObject]@{ + Key = "sp:$applicationDisplayName" + Value = ($applicationDisplayName + " (SP)") + Name = $applicationDisplayName + } + } + + if ($userDisplayName) { + return [PSCustomObject]@{ + Key = "display:$userDisplayName" + Value = $userDisplayName + Name = $userDisplayName + } + } + + return $null + } + + function _getResourceId { + [CmdletBinding()] + param ([string] $filePath) + + $fileName = [System.IO.Path]::GetFileNameWithoutExtension($filePath) + + if ($filePath -like "*Device Configurations/mobileconfig/*") { + $parentFolderPath = Split-Path (Split-Path $filePath -Parent) -Parent + $fileName = Get-ChildItem $parentFolderPath -File | ? { + (ConvertFrom-Json -InputObject (Get-Content $_.FullName -Raw)).payloadFileName -eq [System.IO.Path]::GetFileName($filePath) + } | select -expand BaseName + if (!$fileName) { + Write-Warning "Unable to find 'parent' config file for $filePath" + return + } + } elseif ($filePath -like "*/Managed Google Play/*") { + return ($modificationEvent | ? { $_.Category -eq 'Enrollment' -and $_.ActivityType -eq "Patch AndroidForWorkSettings" }).Resources.ResourceId + } + + $delimiter = "__" + if ($fileName -like "*$delimiter*") { + $resourceId = ($fileName -split $delimiter)[-1] + $resourceId = $resourceId -replace "^_*" + } else { + $resourceId = $null + } + + return $resourceId + } + + $gitCommitDepth = 30 + git fetch --depth=$gitCommitDepth + $commitList = _startProcess git "--no-pager log --no-show-signature -$gitCommitDepth --format=%s%%%%%%%cI" -outputErr2Std -dontWait + $lastCommitDate = $commitList -split "`n" | ? { $_ } | % { + $commitName, $commitDate = $_ -split "%%%" + if ($commitName -match "^\d{4}\.\d{2}\.\d{2}_\d{2}\.\d{2} -- ") { $commitDate } + } + + if ($lastCommitDate) { + $lastCommitDate = Get-Date @($lastCommitDate)[0] + } else { + Write-Warning "Unable to obtain date of the last backup config commit. ALL Intune audit events will be gathered." + } + + $modificationData = New-Object System.Collections.ArrayList + + $filter = @("activityResult eq 'Success'", "ActivityOperationType ne 'Get'") + + if ($lastCommitDate) { + $lastCommitDate = $lastCommitDate.ToUniversalTime() + $filterDateTimeFrom = Get-Date -Date $lastCommitDate -Format "yyyy-MM-ddTHH:mm:ss" + $filter += "ActivityDateTime ge $filterDateTimeFrom`Z" + } + + $backupStart = [DateTime]::ParseExact("$(BACKUP_START)", "yyyy.MM.dd:HH.mm.ss", $null).ToUniversalTime() + $filterDateTimeTo = Get-Date -Date $backupStart -Format "yyyy-MM-ddTHH:mm:ss" + $filter += "ActivityDateTime le $filterDateTimeTo`Z" + + $eventFilter = $filter -join " and " + + "`nGetting Intune event logs" + "`t- from: '$lastCommitDate' (UTC) to: '$backupStart' (UTC)" + "`t- filter: $eventFilter" + $modificationEvent = Get-MgDeviceManagementAuditEvent -Filter $eventFilter -All + + $changedFileCount = @($changedFile).Count + $showPerFileLog = $changedFileCount -le 100 + $unresolvedAuthorCount = 0 + $nonResourceFileCount = 0 + + if ($showPerFileLog) { + "`nProcessing changed files" + } else { + "`nProcessing changed files ($changedFileCount total, condensed logging enabled)" + } + foreach ($file in $changedFile) { + $resourceId = _getResourceId $file + + if ($resourceId) { + if ($showPerFileLog) { + "`t- $resourceId ($file)" + } + $resourceModificationEvent = $modificationEvent | ? { $_.Resources.ResourceId -eq $resourceId } + + $modificationAuthor = @() + $resourceModificationEvent.Actor | % { + $actorIdentity = _getActorIdentity $_ + if ($actorIdentity) { + $modificationAuthor += $actorIdentity + } + } + $modificationAuthor = $modificationAuthor | Sort-Object Key -Unique + } else { + $isNonResourceFile = ( + $file -like "*/reports/*" -or + $file -like "*/Assignment Report/*" -or + $file -like "*/Managed Google Play/*" -or + $file -like "*Device Management Settings/settings.json" -or + $file -like "*/Apple Push Notification/*" -or + $file -like "*Device Configurations/mobileconfig/*" -or + $file -like "*.md" -or + $file -like "*.gitkeep" + ) + + if ($isNonResourceFile) { + $nonResourceFileCount++ + } else { + throw "Unable to find resourceId in '$file' file name. Pipeline code modification needed." + } + $modificationAuthor = $null + } + + if ($modificationAuthor) { + if ($showPerFileLog) { + "`t`t- changed by: $($modificationAuthor.Name -join ', ')" + } + } else { + if ($resourceId) { + $unresolvedAuthorCount++ + } + if ($showPerFileLog) { + "`t`t- unable to find out who made the change" + } + $modificationAuthor = @([PSCustomObject]@{ + Key = "fallback:$fallbackCommitUserEmail" + Value = $fallbackCommitUserEmail + Name = $fallbackCommitUserName + }) + } + + $null = $modificationData.Add([PSCustomObject]@{ + resourceId = $resourceId + file = Join-Path $root $file + modificationAuthorKey = ($modificationAuthor.Key -join '&') + modificationAuthorValue = $modificationAuthor.Value + modificationAuthorName = $modificationAuthor.Name + }) + } + + if ($nonResourceFileCount -gt 0) { + Write-Host "Skipped resourceId lookup for $nonResourceFileCount non-resource files." + } + if ($unresolvedAuthorCount -gt 0) { + Write-Warning "Unable to resolve author from Intune audit logs for $unresolvedAuthorCount of $changedFileCount changed files. Fallback identity used." + } + + "`nCommit changes" + $modificationData | Group-Object modificationAuthorKey | % { + $modificationAuthorValue = @($_.Group | % { $_.modificationAuthorValue } | ? { $_ } | Select-Object -Unique) + $modificationAuthorName = @($_.Group | % { $_.modificationAuthorName } | ? { $_ } | Select-Object -Unique) + $modifiedFile = $_.Group.File + + $modifiedFile | % { + "`t- Adding $_" + $gitResult = _startProcess git -ArgumentList "add `"$_`"" -dontWait -outputErr2Std + if ($gitResult -match "^fatal:") { throw $gitResult } + } + + "`t- Setting commit author(s): $($modificationAuthorName -join ', ')" + git config user.name ($modificationAuthorName -join ', ') + git config user.email ($modificationAuthorValue -join ', ') + + $DATEF = (Get-Date $backupStart -f "yyyy.MM.dd_HH.mm") + $commitName = "$DATEF` -- $($modificationAuthorName -join ', ')" + + "`t- Creating commit '$commitName'" + $null = _startProcess git -ArgumentList "commit -m `"$commitName`"" -dontWait + + $unpushedCommit = _startProcess git -ArgumentList "cherry -v origin/$(BASELINE_BRANCH)" + if ([string]::IsNullOrEmpty($unpushedCommit)) { + Write-Warning "Nothing to commit?! This shouldn't happen." + echo "##vso[task.setVariable variable=CHANGE_DETECTED]0" + echo "##vso[task.setVariable variable=CHANGE_DETECTED;isOutput=true]0" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED]0" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED;isOutput=true]0" + } else { + echo "##vso[task.setVariable variable=COMMIT_DATE;isOutput=true]$DATEF" + echo "##vso[task.setVariable variable=MODIFICATION_AUTHOR;isOutput=true]$(($modificationData.modificationAuthorValue | select -Unique | Sort-Object) -join ', ')" + } + } + + "`nPush changes to drift branch" + git push --force-with-lease origin "HEAD:$(DRIFT_BRANCH_INTUNE)" + if ($LASTEXITCODE -ne 0) { throw "Failed to push backup commits to origin/$(DRIFT_BRANCH_INTUNE)" } + $commitSha = (git rev-parse HEAD).Trim() + echo "##vso[task.setVariable variable=CHANGE_DETECTED]1" + echo "##vso[task.setVariable variable=CHANGE_DETECTED;isOutput=true]1" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED]1" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED;isOutput=true]1" + echo "##vso[task.setVariable variable=COMMIT_SHA;isOutput=true]$commitSha" + } else { + "No change detected" + echo "##vso[task.setVariable variable=CHANGE_DETECTED]0" + echo "##vso[task.setVariable variable=CHANGE_DETECTED;isOutput=true]0" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED]0" + echo "##vso[task.setVariable variable=ROLLING_PR_SYNC_REQUIRED;isOutput=true]0" + } + + # Create markdown documentation (non-drift output) + - task: Bash@3 + displayName: Generate markdown document + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1')) + inputs: + targetType: inline + script: | + set -euo pipefail + + INTRO="Intune backup and documentation generated at $(Build.Repository.Uri) " + + if [ "$(SPLIT_DOCUMENTATION)" = "true" ]; then + IntuneCD-startdocumentation \ + --path="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" \ + --tenantname="$TENANT_NAME" \ + --intro="$INTRO" \ + --split \ + --enrich-documentation + sed "s#](\\./#](./$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)/#g" \ + "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)/index.md" \ + > "$(Build.SourcesDirectory)/prod-as-built.md" + else + IntuneCD-startdocumentation \ + --path="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" \ + --outpath="$(Build.SourcesDirectory)/prod-as-built.md" \ + --tenantname="$TENANT_NAME" \ + --intro="$INTRO" \ + --enrich-documentation + fi + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + TENANT_NAME: $(TENANT_NAME) + + - job: create_or_update_intune_pr + displayName: Create or update rolling Intune drift PR + dependsOn: backup_intune + condition: and(eq(variables['ENABLE_WORKLOAD_INTUNE'], 'true'), succeeded(), or(eq(dependencies.backup_intune.outputs['commitAndSetVariable.CHANGE_DETECTED'], '1'), eq(dependencies.backup_intune.outputs['commitAndSetVariable.ROLLING_PR_SYNC_REQUIRED'], '1'))) + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Ensure rolling PR exists + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/ensure_rolling_pr.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "intune" \ + --drift-branch "$(DRIFT_BRANCH_INTUNE)" \ + --baseline-branch "$(BASELINE_BRANCH)" \ + --pr-title "$(ROLLING_PR_TITLE_INTUNE)" + failOnStderr: true + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + BUILD_BUILDNUMBER: $(Build.BuildNumber) + BUILD_BUILDID: $(Build.BuildId) + AUTO_REMEDIATE_ON_PR_REJECTION: $(AUTO_REMEDIATE_ON_PR_REJECTION) + AUTO_REMEDIATE_RESTORE_PIPELINE_ID: $(AUTO_REMEDIATE_RESTORE_PIPELINE_ID) + AUTO_REMEDIATE_DRY_RUN: $(AUTO_REMEDIATE_DRY_RUN) + AUTO_REMEDIATE_UPDATE_ASSIGNMENTS: $(AUTO_REMEDIATE_UPDATE_ASSIGNMENTS) + AUTO_REMEDIATE_REMOVE_OBJECTS: $(AUTO_REMEDIATE_REMOVE_OBJECTS) + AUTO_REMEDIATE_MAX_WORKERS: $(AUTO_REMEDIATE_MAX_WORKERS) + AUTO_REMEDIATE_EXCLUDE_CSV: $(AUTO_REMEDIATE_EXCLUDE_CSV) + AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE: false + ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS: $(ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS) + ROLLING_PR_MERGE_STRATEGY: $(ROLLING_PR_MERGE_STRATEGY) + + - job: update_intune_pr_summary + displayName: Update rolling Intune PR summary + dependsOn: + - backup_intune + - create_or_update_intune_pr + condition: and(eq(variables['ENABLE_WORKLOAD_INTUNE'], 'true'), eq(variables['ENABLE_PR_REVIEW_SUMMARY'], 'true'), or(eq(dependencies.backup_intune.outputs['commitAndSetVariable.CHANGE_DETECTED'], '1'), eq(dependencies.backup_intune.outputs['commitAndSetVariable.ROLLING_PR_SYNC_REQUIRED'], '1')), in(dependencies.backup_intune.result, 'Succeeded', 'SucceededWithIssues'), in(dependencies.create_or_update_intune_pr.result, 'Succeeded', 'SucceededWithIssues', 'Skipped')) + pool: + name: $(AGENT_POOL_NAME) + continueOnError: false + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Validate Azure OpenAI availability (Intune) + condition: eq(variables['ENABLE_PR_AI_SUMMARY'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/diagnostics/precheck_azure_openai_availability.py" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + ENABLE_PR_AI_SUMMARY: $(ENABLE_PR_AI_SUMMARY) + AZURE_OPENAI_ENDPOINT: $(AZURE_OPENAI_ENDPOINT) + AZURE_OPENAI_DEPLOYMENT: $(AZURE_OPENAI_DEPLOYMENT) + AZURE_OPENAI_API_KEY: $(AZURE_OPENAI_API_KEY) + AZURE_OPENAI_API_VERSION: $(AZURE_OPENAI_API_VERSION) + REQUIRE_CHANGE_TICKETS: $(REQUIRE_CHANGE_TICKETS) + CHANGE_TICKET_REGEX: $(CHANGE_TICKET_REGEX) + DEBUG_CHANGE_TICKET_THREADS: $(DEBUG_CHANGE_TICKET_THREADS) + ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS: $(ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS) + + - task: Bash@3 + displayName: Debug change-ticket gate vars (Intune) + inputs: + targetType: inline + script: | + set -euo pipefail + echo "ENABLE_PR_REVIEW_SUMMARY='$(ENABLE_PR_REVIEW_SUMMARY)'" + echo "REQUIRE_CHANGE_TICKETS='$(REQUIRE_CHANGE_TICKETS)'" + echo "CHANGE_TICKET_REGEX='$(CHANGE_TICKET_REGEX)'" + echo "DEBUG_CHANGE_TICKET_THREADS='$(DEBUG_CHANGE_TICKET_THREADS)'" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Update automated reviewer summary (Intune) + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/update_pr_review_summary.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "intune" \ + --backup-folder "$(BACKUP_FOLDER)" \ + --reports-subdir "$(REPORTS_SUBDIR)" \ + --drift-branch "$(DRIFT_BRANCH_INTUNE)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + ENABLE_PR_AI_SUMMARY: $(ENABLE_PR_AI_SUMMARY) + AZURE_OPENAI_ENDPOINT: $(AZURE_OPENAI_ENDPOINT) + AZURE_OPENAI_DEPLOYMENT: $(AZURE_OPENAI_DEPLOYMENT) + AZURE_OPENAI_API_KEY: $(AZURE_OPENAI_API_KEY) + AZURE_OPENAI_API_VERSION: $(AZURE_OPENAI_API_VERSION) + REQUIRE_CHANGE_TICKETS: $(REQUIRE_CHANGE_TICKETS) + CHANGE_TICKET_REGEX: $(CHANGE_TICKET_REGEX) + DEBUG_CHANGE_TICKET_THREADS: $(DEBUG_CHANGE_TICKET_THREADS) + + - task: Bash@3 + displayName: Apply reviewer /reject decisions (Intune) + condition: eq(variables['ENABLE_PR_REVIEWER_DECISIONS'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/apply_reviewer_rejections.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "intune" \ + --drift-branch "$(DRIFT_BRANCH_INTUNE)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + + - task: Bash@3 + displayName: Queue post-merge remediation from reviewer /reject (Intune) + condition: eq(variables['AUTO_REMEDIATE_AFTER_MERGE'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/queue_post_merge_restore.py" \ + --workload "intune" \ + --drift-branch "$(DRIFT_BRANCH_INTUNE)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + AUTO_REMEDIATE_AFTER_MERGE: $(AUTO_REMEDIATE_AFTER_MERGE) + AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS: $(AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS) + AUTO_REMEDIATE_RESTORE_PIPELINE_ID: $(AUTO_REMEDIATE_RESTORE_PIPELINE_ID) + AUTO_REMEDIATE_DRY_RUN: $(AUTO_REMEDIATE_DRY_RUN) + AUTO_REMEDIATE_UPDATE_ASSIGNMENTS: $(AUTO_REMEDIATE_UPDATE_ASSIGNMENTS) + AUTO_REMEDIATE_REMOVE_OBJECTS: $(AUTO_REMEDIATE_REMOVE_OBJECTS) + AUTO_REMEDIATE_MAX_WORKERS: $(AUTO_REMEDIATE_MAX_WORKERS) + AUTO_REMEDIATE_EXCLUDE_CSV: $(AUTO_REMEDIATE_EXCLUDE_CSV) + AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE: false + + - job: backup_entra + displayName: Backup & commit Entra configuration + condition: eq(variables['ENABLE_WORKLOAD_ENTRA'], 'true') + pool: + name: $(AGENT_POOL_NAME) + continueOnError: false + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Snapshot export/validation helper scripts (Entra job) + inputs: + targetType: inline + script: | + set -euo pipefail + SCRIPT_ROOT="$(Agent.TempDirectory)/pipeline-scripts-entra" + rm -rf "$SCRIPT_ROOT" + mkdir -p "$SCRIPT_ROOT" + cp "$(Build.SourcesDirectory)/scripts/export_entra_baseline.py" "$SCRIPT_ROOT/export_entra_baseline.py" + cp "$(Build.SourcesDirectory)/scripts/validate_backup_outputs.py" "$SCRIPT_ROOT/validate_backup_outputs.py" + chmod +x "$SCRIPT_ROOT/export_entra_baseline.py" "$SCRIPT_ROOT/validate_backup_outputs.py" + echo "##vso[task.setvariable variable=PIPELINE_SCRIPT_ROOT]$SCRIPT_ROOT" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Prepare Entra drift branch from baseline + inputs: + targetType: inline + script: | + set -euo pipefail + if git ls-remote --exit-code --heads origin "$(DRIFT_BRANCH_ENTRA)" >/dev/null 2>&1; then + git fetch --quiet origin "$(BASELINE_BRANCH)" "$(DRIFT_BRANCH_ENTRA)" + else + git fetch --quiet origin "$(BASELINE_BRANCH)" + fi + git checkout --force -B "$(DRIFT_BRANCH_ENTRA)" "origin/$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Determine Entra export scope (light vs full) + inputs: + targetType: inline + script: | + set -euo pipefail + LOCAL_NOW="$(TZ=$(BACKUP_TIMEZONE) date '+%Y-%m-%d %H:%M:%S %Z')" + LOCAL_HOUR="$(TZ=$(BACKUP_TIMEZONE) date '+%H')" + + FORCE_FULL_PARAM="$(echo '${{ parameters.forceFullRun }}' | tr '[:upper:]' '[:lower:]')" + if [ "$FORCE_FULL_PARAM" = "true" ]; then + MODE="full" + FULL_RUN=1 + MODE_REASON="forced by parameter forceFullRun=true" + elif [ "$PRAGUE_HOUR" = "00" ]; then + MODE="full" + FULL_RUN=1 + MODE_REASON="scheduled midnight full run" + else + MODE="light" + FULL_RUN=0 + MODE_REASON="default hourly light run" + fi + + if [ "$(ENTRA_INCLUDE_ENTERPRISE_APPS)" = "true" ] && [ "$FULL_RUN" = "1" ]; then + ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE="true" + ENTERPRISE_SCOPE_REASON="enabled (full run)" + elif [ "$(ENTRA_INCLUDE_ENTERPRISE_APPS)" = "true" ] && [ "$FULL_RUN" = "0" ]; then + ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE="false" + ENTERPRISE_SCOPE_REASON="disabled (light run)" + else + ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE="false" + ENTERPRISE_SCOPE_REASON="disabled (pipeline variable)" + fi + + if [ "$(ENTRA_INCLUDE_APP_REGISTRATIONS)" = "true" ] && [ "$FULL_RUN" = "1" ]; then + ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE="true" + APP_REG_SCOPE_REASON="enabled (full run)" + elif [ "$(ENTRA_INCLUDE_APP_REGISTRATIONS)" = "true" ] && [ "$FULL_RUN" = "0" ]; then + ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE="false" + APP_REG_SCOPE_REASON="disabled (light run; TODO: resolution-flip issue)" + else + ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE="false" + APP_REG_SCOPE_REASON="disabled (pipeline variable)" + fi + + echo "Run mode decision (Entra): $MODE ($MODE_REASON; Prague local time: $PRAGUE_NOW)" + echo "Enterprise Applications export scope: $ENTERPRISE_SCOPE_REASON" + echo "App Registrations export scope: $APP_REG_SCOPE_REASON" + echo "##vso[task.setvariable variable=ENTRA_RUN_MODE]$MODE" + echo "##vso[task.setvariable variable=ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE]$ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE" + echo "##vso[task.setvariable variable=ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE]$ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Reset Entra export paths + inputs: + targetType: inline + script: | + set -euo pipefail + ENTRA_ROOT="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" + ENTRA_REPORTS_ROOT="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/entra" + mkdir -p "$ENTRA_ROOT" + PRESERVE_DIRS=() + if [ "$(ENTRA_INCLUDE_ENTERPRISE_APPS)" = "true" ] && [ "$(ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE)" != "true" ]; then + PRESERVE_DIRS+=("Enterprise Applications") + fi + if [ "$(ENTRA_INCLUDE_APP_REGISTRATIONS)" = "true" ] && [ "$(ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE)" != "true" ]; then + PRESERVE_DIRS+=("App Registrations") + fi + if [ "${#PRESERVE_DIRS[@]}" -gt 0 ]; then + echo "Preserving Entra baseline categories in light run: ${PRESERVE_DIRS[*]}" + if git show-ref --verify --quiet "refs/remotes/origin/$(DRIFT_BRANCH_ENTRA)"; then + for preserve_name in "${PRESERVE_DIRS[@]}"; do + preserve_path="$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)/$preserve_name" + if git cat-file -e "origin/$(DRIFT_BRANCH_ENTRA):$preserve_path" 2>/dev/null; then + git checkout --quiet "origin/$(DRIFT_BRANCH_ENTRA)" -- "$preserve_path" || true + fi + done + fi + find "$ENTRA_ROOT" -mindepth 1 -maxdepth 1 -print0 | while IFS= read -r -d '' entry; do + entry_name="$(basename "$entry")" + keep=0 + for preserve_name in "${PRESERVE_DIRS[@]}"; do + if [ "$entry_name" = "$preserve_name" ]; then + keep=1 + break + fi + done + if [ "$keep" = "0" ]; then + rm -rf "$entry" + fi + done + else + rm -rf "$ENTRA_ROOT" + fi + rm -rf "$ENTRA_REPORTS_ROOT" + mkdir -p "$ENTRA_ROOT" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Set Entra backup start time + inputs: + targetType: inline + script: | + set -euo pipefail + BACKUP_START="$(date +%Y.%m.%d:%H.%M.%S)" + echo "Entra backup start time (UTC): $BACKUP_START" + echo "##vso[task.setvariable variable=BACKUP_START]$BACKUP_START" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: AzurePowerShell@5 + displayName: Get Graph Token for Entra workload + inputs: + azureSubscription: $(SERVICE_CONNECTION_NAME) + azurePowerShellVersion: LatestVersion + ScriptType: inlineScript + Inline: | + $getTokenParams = @{ + ResourceTypeName = 'MSGraph' + AsSecureString = $true + ErrorAction = 'Stop' + } + $tokenCommand = Get-Command Get-AzAccessToken -ErrorAction Stop + if ($tokenCommand.Parameters.ContainsKey('ForceRefresh')) { + $getTokenParams['ForceRefresh'] = $true + } + $accessToken = ([PSCredential]::New('dummy', (Get-AzAccessToken @getTokenParams).Token).GetNetworkCredential().Password) + + $tokenParts = $accessToken.Split('.') + if ($tokenParts.Length -lt 2) { throw "Invalid Graph access token format." } + $payload = $tokenParts[1].Replace('-', '+').Replace('_', '/') + switch ($payload.Length % 4) { + 2 { $payload += '==' } + 3 { $payload += '=' } + } + $payloadJson = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($payload)) + $claims = $payloadJson | ConvertFrom-Json + $roles = @($claims.roles) + $sortedRoles = $roles | Sort-Object + Write-Host "Graph token roles (Entra workload): $($sortedRoles -join ', ')" + + $missingRoles = @() + if ("$(ENTRA_INCLUDE_CONDITIONAL_ACCESS)" -eq "true") { + if (-not ($roles -contains 'Policy.Read.All')) { $missingRoles += 'Policy.Read.All' } + if (-not ($roles -contains 'Policy.Read.ConditionalAccess')) { $missingRoles += 'Policy.Read.ConditionalAccess' } + } + if ("$(ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE)" -eq "true" -or "$(ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE)" -eq "true") { + if (-not ($roles -contains 'Application.Read.All')) { $missingRoles += 'Application.Read.All' } + } + if ($missingRoles.Count -gt 0) { + $missingRoles = $missingRoles | Select-Object -Unique + Write-Host "##vso[task.logissue type=error]Graph token is missing Entra workload roles: $($missingRoles -join ', ')" + throw "Service connection token is missing required Entra permissions." + } + + Write-Host "##vso[task.setvariable variable=accessToken;issecret=true]$accessToken" + + - task: Bash@3 + displayName: Export Entra baseline objects + inputs: + targetType: inline + script: | + set -euo pipefail + echo "Starting Entra baseline export (separate workload branch)..." + timeout 45m python3 -u "$(PIPELINE_SCRIPT_ROOT)/export_entra_baseline.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --token "$(accessToken)" \ + --previous-snapshot-ref "origin/$(DRIFT_BRANCH_ENTRA)" \ + --include-named-locations "$(ENTRA_INCLUDE_NAMED_LOCATIONS)" \ + --include-authentication-strengths "$(ENTRA_INCLUDE_AUTHENTICATION_STRENGTHS)" \ + --include-conditional-access "$(ENTRA_INCLUDE_CONDITIONAL_ACCESS)" \ + --include-enterprise-applications "$(ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE)" \ + --enterprise-app-workers "$(ENTRA_ENTERPRISE_APP_WORKERS)" \ + --include-app-registrations "$(ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE)" \ + --fail-on-export-error "true" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Resolve Conditional Access reference names (Entra) + condition: eq(variables['ENTRA_INCLUDE_CONDITIONAL_ACCESS'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/resolve_ca_references.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --token "$(accessToken)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Generate policy assignment report (Entra CA) + condition: eq(variables['ENTRA_INCLUDE_CONDITIONAL_ACCESS'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/generate_assignment_report.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --output-dir "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/entra" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Generate apps inventory report (Entra) + condition: or(eq(variables['ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE'], 'true'), eq(variables['ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE'], 'true')) + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/generate_app_inventory_report.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --output-dir "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/entra" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Generate object inventory reports (Entra) + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/generate_object_inventory_reports.py" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --output-dir "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/entra" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Validate Entra backup outputs + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(PIPELINE_SCRIPT_ROOT)/validate_backup_outputs.py" \ + --workload entra \ + --mode "$(ENTRA_RUN_MODE)" \ + --root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --reports-root "$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(REPORTS_SUBDIR)/entra" \ + --include-named-locations "$(ENTRA_INCLUDE_NAMED_LOCATIONS)" \ + --include-authentication-strengths "$(ENTRA_INCLUDE_AUTHENTICATION_STRENGTHS)" \ + --include-conditional-access "$(ENTRA_INCLUDE_CONDITIONAL_ACCESS)" \ + --include-enterprise-applications "$(ENTRA_INCLUDE_ENTERPRISE_APPS)" \ + --include-enterprise-applications-effective "$(ENTRA_INCLUDE_ENTERPRISE_APPS_EFFECTIVE)" \ + --include-app-registrations "$(ENTRA_INCLUDE_APP_REGISTRATIONS)" \ + --include-app-registrations-effective "$(ENTRA_INCLUDE_APP_REGISTRATIONS_EFFECTIVE)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Revert enrichment-only Entra drift noise + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/filter_entra_enrichment_noise.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload-root "$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --fail-on-residual-enrichment-drift "true" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Commit & push Entra drift branch + name: commitEntra + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/commit_entra_drift.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload-root "$(BACKUP_FOLDER)/$(ENTRA_BACKUP_SUBDIR)" \ + --baseline-branch "$(BASELINE_BRANCH)" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --access-token "$(accessToken)" \ + --service-name "$(USER_NAME)" \ + --service-email "$(USER_EMAIL)" \ + --build-reason "$(Build.Reason)" \ + --requested-for "$(Build.RequestedFor)" \ + --requested-for-email "$(Build.RequestedForEmail)" \ + --backup-start "$(BACKUP_START)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - job: create_or_update_entra_pr + displayName: Create or update rolling Entra drift PR + dependsOn: backup_entra + condition: and(eq(variables['ENABLE_WORKLOAD_ENTRA'], 'true'), succeeded(), or(eq(dependencies.backup_entra.outputs['commitEntra.CHANGE_DETECTED'], '1'), eq(dependencies.backup_entra.outputs['commitEntra.ROLLING_PR_SYNC_REQUIRED'], '1'))) + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Ensure rolling Entra PR exists + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/ensure_rolling_pr.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "entra" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --baseline-branch "$(BASELINE_BRANCH)" \ + --pr-title "$(ROLLING_PR_TITLE_ENTRA)" + failOnStderr: true + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + BUILD_BUILDNUMBER: $(Build.BuildNumber) + BUILD_BUILDID: $(Build.BuildId) + AUTO_REMEDIATE_ON_PR_REJECTION: $(AUTO_REMEDIATE_ON_PR_REJECTION) + AUTO_REMEDIATE_RESTORE_PIPELINE_ID: $(AUTO_REMEDIATE_RESTORE_PIPELINE_ID) + AUTO_REMEDIATE_DRY_RUN: $(AUTO_REMEDIATE_DRY_RUN) + AUTO_REMEDIATE_UPDATE_ASSIGNMENTS: $(AUTO_REMEDIATE_UPDATE_ASSIGNMENTS) + AUTO_REMEDIATE_REMOVE_OBJECTS: $(AUTO_REMEDIATE_REMOVE_OBJECTS) + AUTO_REMEDIATE_MAX_WORKERS: $(AUTO_REMEDIATE_MAX_WORKERS) + AUTO_REMEDIATE_EXCLUDE_CSV: $(AUTO_REMEDIATE_EXCLUDE_CSV) + AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE: true + ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS: $(ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS) + ROLLING_PR_MERGE_STRATEGY: $(ROLLING_PR_MERGE_STRATEGY) + + - job: update_entra_pr_summary + displayName: Update rolling Entra PR summary + dependsOn: + - backup_entra + - create_or_update_entra_pr + condition: and(eq(variables['ENABLE_WORKLOAD_ENTRA'], 'true'), eq(variables['ENABLE_PR_REVIEW_SUMMARY'], 'true'), or(eq(dependencies.backup_entra.outputs['commitEntra.CHANGE_DETECTED'], '1'), eq(dependencies.backup_entra.outputs['commitEntra.ROLLING_PR_SYNC_REQUIRED'], '1')), in(dependencies.backup_entra.result, 'Succeeded', 'SucceededWithIssues'), in(dependencies.create_or_update_entra_pr.result, 'Succeeded', 'SucceededWithIssues', 'Skipped')) + pool: + name: $(AGENT_POOL_NAME) + continueOnError: false + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Validate Azure OpenAI availability (Entra) + condition: eq(variables['ENABLE_PR_AI_SUMMARY'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/diagnostics/precheck_azure_openai_availability.py" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + ENABLE_PR_AI_SUMMARY: $(ENABLE_PR_AI_SUMMARY) + AZURE_OPENAI_ENDPOINT: $(AZURE_OPENAI_ENDPOINT) + AZURE_OPENAI_DEPLOYMENT: $(AZURE_OPENAI_DEPLOYMENT) + AZURE_OPENAI_API_KEY: $(AZURE_OPENAI_API_KEY) + AZURE_OPENAI_API_VERSION: $(AZURE_OPENAI_API_VERSION) + + - task: Bash@3 + displayName: Debug change-ticket gate vars (Entra) + inputs: + targetType: inline + script: | + set -euo pipefail + echo "ENABLE_PR_REVIEW_SUMMARY='$(ENABLE_PR_REVIEW_SUMMARY)'" + echo "REQUIRE_CHANGE_TICKETS='$(REQUIRE_CHANGE_TICKETS)'" + echo "CHANGE_TICKET_REGEX='$(CHANGE_TICKET_REGEX)'" + echo "DEBUG_CHANGE_TICKET_THREADS='$(DEBUG_CHANGE_TICKET_THREADS)'" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Update automated reviewer summary (Entra) + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/update_pr_review_summary.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "entra" \ + --backup-folder "$(BACKUP_FOLDER)" \ + --reports-subdir "$(REPORTS_SUBDIR)" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + ENABLE_PR_AI_SUMMARY: $(ENABLE_PR_AI_SUMMARY) + AZURE_OPENAI_ENDPOINT: $(AZURE_OPENAI_ENDPOINT) + AZURE_OPENAI_DEPLOYMENT: $(AZURE_OPENAI_DEPLOYMENT) + AZURE_OPENAI_API_KEY: $(AZURE_OPENAI_API_KEY) + AZURE_OPENAI_API_VERSION: $(AZURE_OPENAI_API_VERSION) + REQUIRE_CHANGE_TICKETS: $(REQUIRE_CHANGE_TICKETS) + CHANGE_TICKET_REGEX: $(CHANGE_TICKET_REGEX) + DEBUG_CHANGE_TICKET_THREADS: $(DEBUG_CHANGE_TICKET_THREADS) + ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS: $(ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS) + + - task: Bash@3 + displayName: Apply reviewer /reject decisions (Entra) + condition: eq(variables['ENABLE_PR_REVIEWER_DECISIONS'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/apply_reviewer_rejections.py" \ + --repo-root "$(Build.SourcesDirectory)" \ + --workload "entra" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + + - task: Bash@3 + displayName: Queue post-merge remediation from reviewer /reject (Entra) + condition: eq(variables['AUTO_REMEDIATE_AFTER_MERGE'], 'true') + inputs: + targetType: inline + script: | + set -euo pipefail + python3 "$(Build.SourcesDirectory)/scripts/queue_post_merge_restore.py" \ + --workload "entra" \ + --drift-branch "$(DRIFT_BRANCH_ENTRA)" \ + --baseline-branch "$(BASELINE_BRANCH)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + SYSTEM_COLLECTIONURI: $(System.CollectionUri) + SYSTEM_TEAMPROJECT: $(System.TeamProject) + BUILD_REPOSITORY_ID: $(Build.Repository.ID) + AUTO_REMEDIATE_AFTER_MERGE: $(AUTO_REMEDIATE_AFTER_MERGE) + AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS: $(AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS) + AUTO_REMEDIATE_RESTORE_PIPELINE_ID: $(AUTO_REMEDIATE_RESTORE_PIPELINE_ID) + AUTO_REMEDIATE_DRY_RUN: $(AUTO_REMEDIATE_DRY_RUN) + AUTO_REMEDIATE_UPDATE_ASSIGNMENTS: $(AUTO_REMEDIATE_UPDATE_ASSIGNMENTS) + AUTO_REMEDIATE_REMOVE_OBJECTS: $(AUTO_REMEDIATE_REMOVE_OBJECTS) + AUTO_REMEDIATE_MAX_WORKERS: $(AUTO_REMEDIATE_MAX_WORKERS) + AUTO_REMEDIATE_EXCLUDE_CSV: $(AUTO_REMEDIATE_EXCLUDE_CSV) + AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE: true + + - job: tag + displayName: Tag repo + dependsOn: backup_intune + condition: and(eq(variables['ENABLE_TAGGING'], 'true'), succeeded(), eq(dependencies.backup_intune.outputs['commitAndSetVariable.CHANGE_DETECTED'], '1'), eq(dependencies.backup_intune.outputs['setRunMode.FULL_RUN'], '1')) + pool: + name: $(AGENT_POOL_NAME) + continueOnError: false + variables: + COMMIT_DATE: $[ dependencies.backup_intune.outputs['commitAndSetVariable.COMMIT_DATE'] ] + MODIFICATION_AUTHOR: $[ dependencies.backup_intune.outputs['commitAndSetVariable.MODIFICATION_AUTHOR'] ] + COMMIT_SHA: $[ dependencies.backup_intune.outputs['commitAndSetVariable.COMMIT_SHA'] ] + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Configure Git + inputs: + targetType: inline + script: | + git config user.name "$(USER_NAME)" + git config user.email "$(USER_EMAIL)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Pull origin + inputs: + targetType: inline + script: | + set -euo pipefail + git fetch --quiet origin "$(DRIFT_BRANCH_INTUNE)" + git checkout --force -B "$(DRIFT_BRANCH_INTUNE)" "origin/$(DRIFT_BRANCH_INTUNE)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: PowerShell@2 + displayName: Git tag + inputs: + targetType: inline + pwsh: true + script: | + $ErrorActionPreference = "Stop" + $DATEF = "$(COMMIT_DATE)" + $COMMIT_SHA = "$(COMMIT_SHA)" + Write-Host "Creating TAG '$DATEF'" + if ([string]::IsNullOrWhiteSpace($COMMIT_SHA)) { throw "COMMIT_SHA is empty; backup job did not publish commit SHA." } + $existingTag = git ls-remote --tags origin "refs/tags/$DATEF" + if ($LASTEXITCODE -ne 0) { throw "Failed to query existing tags from origin." } + if (-not [string]::IsNullOrWhiteSpace($existingTag)) { + Write-Host "Tag '$DATEF' already exists on origin. Skipping." + exit 0 + } + git tag -a "$DATEF" "$COMMIT_SHA" -m "$DATEF -- Intune configuration snapshot (changes made by: $(MODIFICATION_AUTHOR))" + if ($LASTEXITCODE -ne 0) { throw "Failed to create tag '$DATEF'" } + git push origin "$DATEF" 2>&1 | Out-Null + if ($LASTEXITCODE -ne 0) { throw "Failed to push tag '$DATEF'" } + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + # Publish PDF & HTML documents as artifacts + - job: publish + displayName: Publish as-built artifacts + dependsOn: backup_intune + condition: and(succeeded(), eq(dependencies.backup_intune.outputs['commitAndSetVariable.CHANGE_DETECTED'], '1'), eq(dependencies.backup_intune.outputs['setRunMode.FULL_RUN'], '1')) + pool: + name: $(AGENT_POOL_NAME) + continueOnError: false + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Pull latest drift branch + inputs: + targetType: inline + script: | + set -euo pipefail + git fetch --quiet origin "$(DRIFT_BRANCH_INTUNE)" + git checkout --force -B "$(DRIFT_BRANCH_INTUNE)" "origin/$(DRIFT_BRANCH_INTUNE)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: NodeTool@0 + displayName: Install Node.js + inputs: + versionSpec: "20.x" + + - task: Bash@3 + displayName: Install md-to-pdf + inputs: + targetType: inline + script: | + set -euo pipefail + npm i --location=global "md-to-pdf@$(MD_TO_PDF_VERSION)" + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Ensure browser dependencies for md-to-pdf + inputs: + targetType: inline + script: | + set -euo pipefail + + has_libglib() { + if command -v ldconfig >/dev/null 2>&1; then + ldconfig -p 2>/dev/null | grep -q "libglib-2.0.so.0" && return 0 + fi + find /lib /usr/lib /usr/local/lib -type f -name "libglib-2.0.so.0*" 2>/dev/null | grep -q . + } + + if has_libglib; then + echo "Browser dependencies look present." + echo "##vso[task.setvariable variable=BROWSER_DEPS_READY]1" + exit 0 + fi + + echo "libglib-2.0.so.0 not found. Attempting to install Chromium runtime dependencies." + + if command -v apt-get >/dev/null 2>&1; then + SUDO="" + if command -v sudo >/dev/null 2>&1; then + SUDO="sudo" + fi + + set +e + $SUDO apt-get update + apt_update_rc=$? + if [ "$apt_update_rc" -eq 0 ]; then + $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install -y \ + libasound2 \ + libatk-bridge2.0-0 \ + libatk1.0-0 \ + libc6 \ + libcairo2 \ + libcups2 \ + libdbus-1-3 \ + libdrm2 \ + libexpat1 \ + libfontconfig1 \ + libgbm1 \ + libglib2.0-0 \ + libgtk-3-0 \ + libnspr4 \ + libnss3 \ + libpango-1.0-0 \ + libx11-6 \ + libx11-xcb1 \ + libxcb1 \ + libxcomposite1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxkbcommon0 \ + libxrandr2 + apt_install_rc=$? + if [ "$apt_install_rc" -ne 0 ]; then + echo "##vso[task.logissue type=warning]apt-get install failed with code $apt_install_rc. HTML/PDF conversion will be skipped." + fi + else + echo "##vso[task.logissue type=warning]apt-get update failed with code $apt_update_rc. HTML/PDF conversion will be skipped." + fi + set -e + else + echo "##vso[task.logissue type=warning]apt-get is not available on this agent. Cannot auto-install browser dependencies." + fi + + if has_libglib; then + echo "Browser dependencies are ready." + echo "##vso[task.setvariable variable=BROWSER_DEPS_READY]1" + else + echo "##vso[task.logissue type=warning]Browser dependencies still missing (libglib-2.0.so.0). HTML/PDF conversion will be skipped." + echo "##vso[task.setvariable variable=BROWSER_DEPS_READY]0" + fi + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: false + + - task: Bash@3 + displayName: Convert markdown to HTML + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), ne(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + targetType: inline + script: | + set -euo pipefail + INPUT_FILE="$(Build.SourcesDirectory)/prod-as-built.md" + OUTPUT_FILE="$(Build.SourcesDirectory)/prod-as-built.html" + [ -s "$INPUT_FILE" ] + wc -c "$INPUT_FILE" + time timeout 20m md-to-pdf "$INPUT_FILE" \ + --config-file "$(Build.SourcesDirectory)/md2pdf/htmlconfig.json" \ + --as-html \ + --launch-options '{"args":["--no-sandbox","--disable-dev-shm-usage"]}' \ + > "$OUTPUT_FILE" + [ -s "$OUTPUT_FILE" ] + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Collect split markdown docs + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), eq(variables['SPLIT_DOCUMENTATION'], 'true')) + inputs: + targetType: inline + script: | + set -euo pipefail + SRC_DIR="$(Build.SourcesDirectory)/$(BACKUP_FOLDER)/$(INTUNE_BACKUP_SUBDIR)" + OUT_DIR="$(Build.SourcesDirectory)/prod-as-built-split-md" + rm -rf "$OUT_DIR" + mkdir -p "$OUT_DIR" + while IFS= read -r f; do + rel_path="${f#"$SRC_DIR/"}" + mkdir -p "$OUT_DIR/$(dirname "$rel_path")" + cp "$f" "$OUT_DIR/$rel_path" + done < <(find "$SRC_DIR" -type f -name '*.md' | sort) + find "$OUT_DIR" -type f -name '*.md' | wc -l + [ -n "$(find "$OUT_DIR" -type f -name '*.md' -print -quit)" ] + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Convert split markdown to HTML + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), eq(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + targetType: inline + script: | + set -euo pipefail + SRC_DIR="$(Build.SourcesDirectory)/prod-as-built-split-md" + OUT_DIR="$(Build.SourcesDirectory)/prod-as-built-split-html" + rm -rf "$OUT_DIR" + mkdir -p "$OUT_DIR" + count=0 + while IFS= read -r md_file; do + count=$((count + 1)) + wc -c "$md_file" + rel_path="${md_file#"$SRC_DIR/"}" + out_html="$OUT_DIR/${rel_path%.md}.html" + mkdir -p "$(dirname "$out_html")" + timeout 5m md-to-pdf "$md_file" \ + --config-file "$(Build.SourcesDirectory)/md2pdf/htmlconfig.json" \ + --as-html \ + --launch-options '{"args":["--no-sandbox","--disable-dev-shm-usage"]}' \ + > "$out_html" + [ -s "$out_html" ] + done < <(find "$SRC_DIR" -type f -name '*.md' | sort) + echo "Converted $count markdown files to split HTML artifacts." + [ "$count" -gt 0 ] + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: PublishBuildArtifacts@1 + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), ne(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + pathToPublish: "$(Build.SourcesDirectory)/prod-as-built.html" + artifactName: "prod-as-built-html" + + - task: PublishBuildArtifacts@1 + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), eq(variables['SPLIT_DOCUMENTATION'], 'true')) + inputs: + pathToPublish: "$(Build.SourcesDirectory)/prod-as-built-split-md" + artifactName: "prod-as-built-split-markdown" + + - task: PublishBuildArtifacts@1 + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), eq(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + pathToPublish: "$(Build.SourcesDirectory)/prod-as-built-split-html" + artifactName: "prod-as-built-split-html" + + - task: Bash@3 + displayName: Convert markdown to PDF + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), ne(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + targetType: inline + script: | + set -euo pipefail + INPUT_FILE="$(Build.SourcesDirectory)/prod-as-built.md" + OUTPUT_FILE="$(Build.SourcesDirectory)/prod-as-built.pdf" + [ -s "$INPUT_FILE" ] + wc -c "$INPUT_FILE" + time timeout 20m md-to-pdf "$INPUT_FILE" \ + --config-file "$(Build.SourcesDirectory)/md2pdf/pdfconfig.json" \ + --launch-options '{"args":["--no-sandbox","--disable-dev-shm-usage"]}' \ + > "$OUTPUT_FILE" + [ -s "$OUTPUT_FILE" ] + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: Bash@3 + displayName: Convert split markdown to PDF + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), eq(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + targetType: inline + script: | + set -euo pipefail + SRC_DIR="$(Build.SourcesDirectory)/prod-as-built-split-md" + OUT_DIR="$(Build.SourcesDirectory)/prod-as-built-split-pdf" + rm -rf "$OUT_DIR" + mkdir -p "$OUT_DIR" + count=0 + while IFS= read -r md_file; do + count=$((count + 1)) + wc -c "$md_file" + rel_path="${md_file#"$SRC_DIR/"}" + out_pdf="$OUT_DIR/${rel_path%.md}.pdf" + mkdir -p "$(dirname "$out_pdf")" + timeout 5m md-to-pdf "$md_file" \ + --config-file "$(Build.SourcesDirectory)/md2pdf/pdfconfig.json" \ + --launch-options '{"args":["--no-sandbox","--disable-dev-shm-usage"]}' \ + > "$out_pdf" + [ -s "$out_pdf" ] + done < <(find "$SRC_DIR" -type f -name '*.md' | sort) + echo "Converted $count markdown files to split PDF artifacts." + [ "$count" -gt 0 ] + workingDirectory: "$(Build.SourcesDirectory)" + failOnStderr: true + + - task: PublishBuildArtifacts@1 + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), ne(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + pathToPublish: "$(Build.SourcesDirectory)/prod-as-built.pdf" + artifactName: "prod-as-built-pdf" + + - task: PublishBuildArtifacts@1 + condition: and(eq(variables['CHANGE_DETECTED'], '1'), eq(variables['FULL_RUN'], '1'), eq(variables['SPLIT_DOCUMENTATION'], 'true'), eq(variables['BROWSER_DEPS_READY'], '1')) + inputs: + pathToPublish: "$(Build.SourcesDirectory)/prod-as-built-split-pdf" + artifactName: "prod-as-built-split-pdf" diff --git a/deploy/RELEASE.md b/deploy/RELEASE.md new file mode 100644 index 0000000..daad134 --- /dev/null +++ b/deploy/RELEASE.md @@ -0,0 +1,46 @@ +# ASTRAL Public Release Checklist + +Use this checklist before publishing a new sanitized version of ASTRAL to the public repository. + +## Pre-release scan + +Run the following commands from the repository root to ensure no tenant-specific data remains: + +```bash +# Search for the original tenant identifiers +grep -ri "cqre" --include="*.{yml,yaml,py,md,json,sh,ps1}" . | grep -v node_modules | grep -v __pycache__ | grep -v .git +grep -ri "kracmar" --include="*.{yml,yaml,py,md,json,sh,ps1}" . | grep -v node_modules | grep -v __pycache__ | grep -v .git +grep -ri "sc_intunebackup" --include="*.{yml,yaml,py,md,json,sh,ps1}" . | grep -v node_modules | grep -v __pycache__ | grep -v .git + +# Search for the original tenant ID (replace with your actual tenant ID) +grep -ri "0ec9f34c-17c8-4541-b084-7d64ecdcc997" --include="*.{yml,yaml,py,md,json,sh,ps1}" . | grep -v node_modules | grep -v __pycache__ | grep -v .git +``` + +Expected result: **zero matches** outside of this release checklist. + +## File verification + +- [ ] `azure-pipelines.yml` contains no hardcoded tenant domain, email, or service connection name. +- [ ] `azure-pipelines-restore.yml` contains no hardcoded tenant domain, email, or service connection name. +- [ ] `azure-pipelines-review-sync.yml` contains no hardcoded tenant-specific values. +- [ ] `scripts/common.py` uses a generic fallback name (not `CQRE_Intune_Backupper`). +- [ ] `tenant-state/` contains only placeholder files (`.gitkeep`, `README.md`). +- [ ] `prod-as-built.md` has been deleted. +- [ ] All markdown documentation uses generic examples (`contoso.onmicrosoft.com`, `astral-backup@contoso.com`, `sc-astral-backup`). + +## Test verification + +- [ ] Unit tests pass: `python3 -m unittest discover -s tests -v` + +## Publication steps + +1. Ensure you are on a clean branch (e.g. `publish/v1.x`). +2. Run the pre-release scan above. +3. Commit any last-minute fixes. +4. Tag the release: `git tag -a v1.0.0 -m "ASTRAL v1.0.0"` +5. Push the tag. +6. Publish to the public repository (fresh clone or specific branch push). + +## Note on Git history + +If the original repository contained live tenant exports in its history, consider publishing from a **squashed or freshly initialized repository** rather than pushing the full private history. The public template does not benefit from historical tenant data, and a clean history avoids accidental exposure of old exports. diff --git a/deploy/bootstrap-tenant.ps1 b/deploy/bootstrap-tenant.ps1 new file mode 100644 index 0000000..67e2051 --- /dev/null +++ b/deploy/bootstrap-tenant.ps1 @@ -0,0 +1,228 @@ +#requires -Version 5.1 +<# +.SYNOPSIS + Bootstraps an Azure AD app registration for ASTRAL with required Microsoft Graph permissions. + +.DESCRIPTION + Creates a single-tenant app registration, assigns read (and optional write) Graph application permissions, + grants admin consent, and configures a workload federated credential for Azure DevOps. + +.PARAMETER TenantName + The Microsoft 365 tenant domain, e.g. contoso.onmicrosoft.com. + +.PARAMETER ServiceConnectionName + The intended Azure DevOps service connection name (used for the federated credential subject). + +.PARAMETER AppDisplayName + Optional display name for the app registration. Default: "ASTRAL Backup Service". + +.PARAMETER AdoOrganizationUrl + Optional Azure DevOps organization URL, e.g. https://dev.azure.com/contoso. + If provided, the script prints a one-liner to create the service connection via REST API. + +.PARAMETER AddRestorePermissions + If specified, also adds write permissions for the restore pipeline. + +.EXAMPLE + .\bootstrap-tenant.ps1 -TenantName "contoso.onmicrosoft.com" -ServiceConnectionName "sc-astral-backup" +#> +[CmdletBinding()] +param ( + [Parameter(Mandatory = $true)] + [string]$TenantName, + + [Parameter(Mandatory = $true)] + [string]$ServiceConnectionName, + + [string]$AppDisplayName = "ASTRAL Backup Service", + + [string]$AdoOrganizationUrl = "", + + [switch]$AddRestorePermissions +) + +$ErrorActionPreference = "Stop" + +function Test-ModuleInstalled { + param ([string]$Name) + $mod = Get-Module -ListAvailable -Name $Name | Select-Object -First 1 + if (-not $mod) { + Write-Host "Installing module: $Name" -ForegroundColor Cyan + Install-Module $Name -Scope CurrentUser -Force -AllowClobber + } +} + +Test-ModuleInstalled "Microsoft.Graph.Applications" +Test-ModuleInstalled "Microsoft.Graph.Identity.SignIns" + +Import-Module Microsoft.Graph.Applications +Import-Module Microsoft.Graph.Identity.SignIns + +Write-Host "Connecting to Microsoft Graph..." -ForegroundColor Cyan +Connect-MgGraph -Scopes "Application.ReadWrite.All","AppRoleAssignment.ReadWrite.All","Directory.Read.All" -NoWelcome + +$tenant = Get-MgOrganization | Select-Object -First 1 +if (-not $tenant) { + throw "Unable to read tenant details. Ensure you are authenticated to the correct tenant." +} + +Write-Host "Tenant: $($tenant.DisplayName) ($($tenant.Id))" -ForegroundColor Green + +# Required read permissions +$readPermissions = @( + "Device.Read.All", + "DeviceManagementApps.Read.All", + "DeviceManagementConfiguration.Read.All", + "DeviceManagementManagedDevices.Read.All", + "DeviceManagementRBAC.Read.All", + "DeviceManagementScripts.Read.All", + "DeviceManagementServiceConfig.Read.All", + "Group.Read.All", + "Policy.Read.All", + "Policy.Read.ConditionalAccess", + "Policy.Read.DeviceConfiguration", + "User.Read.All", + "Application.Read.All" +) + +$optionalReadPermissions = @( + "RoleManagement.Read.Directory", + "Directory.Read.All", + "AuditLog.Read.All" +) + +$restorePermissions = @( + "DeviceManagementApps.ReadWrite.All", + "DeviceManagementConfiguration.ReadWrite.All", + "DeviceManagementManagedDevices.ReadWrite.All", + "DeviceManagementRBAC.ReadWrite.All", + "DeviceManagementScripts.ReadWrite.All", + "DeviceManagementServiceConfig.ReadWrite.All", + "Policy.Read.All", + "Policy.ReadWrite.ConditionalAccess" +) + +$allPermissions = $readPermissions + $optionalReadPermissions +if ($AddRestorePermissions) { + $allPermissions += $restorePermissions +} + +# Get Microsoft Graph SP to map permissions to AppRoles +$graphSp = Get-MgServicePrincipal -Filter "appId eq '00000003-0000-0000-c000-000000000000'" +if (-not $graphSp) { + throw "Microsoft Graph service principal not found in tenant." +} + +$requiredResourceAccess = @() +$appRoles = @() +foreach ($permName in ($allPermissions | Select-Object -Unique)) { + $appRole = $graphSp.AppRoles | Where-Object { $_.Value -eq $permName } | Select-Object -First 1 + if (-not $appRole) { + Write-Warning "Permission '$permName' not found in Microsoft Graph. Skipping." + continue + } + $appRoles += $appRole +} + +if ($appRoles.Count -eq 0) { + throw "No valid Graph permissions resolved. Cannot continue." +} + +$resourceAccess = @() +foreach ($ar in $appRoles) { + $resourceAccess += @{ + id = $ar.Id + type = "Role" + } +} + +$requiredResourceAccess = @( + @{ + resourceAppId = $graphSp.AppId + resourceAccess = $resourceAccess + } +) + +# Create or update app registration +$existingApp = Get-MgApplication -Filter "displayName eq '$AppDisplayName'" | Select-Object -First 1 +if ($existingApp) { + Write-Host "Found existing app registration: $($existingApp.AppId)" -ForegroundColor Yellow + $app = $existingApp + Update-MgApplication -ApplicationId $app.Id -RequiredResourceAccess $requiredResourceAccess + Write-Host "Updated required resource access." -ForegroundColor Green +} +else { + Write-Host "Creating app registration: $AppDisplayName" -ForegroundColor Cyan + $app = New-MgApplication -DisplayName $AppDisplayName -SignInAudience "AzureADMyOrg" -RequiredResourceAccess $requiredResourceAccess + Write-Host "Created app registration. AppId: $($app.AppId)" -ForegroundColor Green +} + +# Ensure service principal exists +$sp = Get-MgServicePrincipal -Filter "appId eq '$($app.AppId)'" | Select-Object -First 1 +if (-not $sp) { + Write-Host "Creating service principal..." -ForegroundColor Cyan + $sp = New-MgServicePrincipal -AppId $app.AppId +} + +# Grant admin consent +Write-Host "Granting admin consent..." -ForegroundColor Cyan +foreach ($ar in $appRoles) { + $existingAssignment = Get-MgServicePrincipalAppRoleAssignment -ServicePrincipalId $sp.Id | Where-Object { $_.AppRoleId -eq $ar.Id } + if (-not $existingAssignment) { + New-MgServicePrincipalAppRoleAssignment -ServicePrincipalId $sp.Id -PrincipalId $sp.Id -ResourceId $graphSp.Id -AppRoleId $ar.Id | Out-Null + } +} +Write-Host "Admin consent granted." -ForegroundColor Green + +# Federated credential for Azure DevOps +$federatedCredentialName = "AstralAzureDevOps-$ServiceConnectionName" +$existingFedCred = Get-MgApplicationFederatedIdentityCredential -ApplicationId $app.Id | Where-Object { $_.Name -eq $federatedCredentialName } +if (-not $existingFedCred) { + Write-Host "Creating federated credential for Azure DevOps..." -ForegroundColor Cyan + # Subject identifier for Azure DevOps workload identity federation + # Format: sc://// + # We require the user to fill in org/project manually or via parameters. + $adoOrg = Read-Host "Enter your Azure DevOps organization name (e.g. 'contoso')" + $adoProject = Read-Host "Enter your Azure DevOps project name (e.g. 'ASTRAL')" + $subject = "sc://$adoOrg/$adoProject/$ServiceConnectionName" + + $params = @{ + Name = $federatedCredentialName + Issuer = "https://vstoken.dev.azure.com" + Subject = $subject + Audiences = @("api://AzureADTokenExchange") + } + New-MgApplicationFederatedIdentityCredential -ApplicationId $app.Id -BodyParameter $params | Out-Null + Write-Host "Federated credential created. Subject: $subject" -ForegroundColor Green +} +else { + Write-Host "Federated credential already exists." -ForegroundColor Yellow +} + +Write-Host "" +Write-Host "=== Bootstrap complete ===" -ForegroundColor Green +Write-Host "Tenant Name: $TenantName" +Write-Host "Tenant ID: $($tenant.Id)" +Write-Host "App Display Name: $AppDisplayName" +Write-Host "App ID: $($app.AppId)" +Write-Host "Service Connection: $ServiceConnectionName" +Write-Host "" +Write-Host "Next steps:" -ForegroundColor Cyan +Write-Host "1. In Azure DevOps, create a Workload Identity Federation service connection." +Write-Host " - Tenant ID: $($tenant.Id)" +Write-Host " - App ID: $($app.AppId)" +Write-Host " - Name: $ServiceConnectionName" +Write-Host "" + +if ($AdoOrganizationUrl) { + $project = if ($AdoOrganizationUrl -match "/([^/]+)$") { $matches[1] } else { "YOUR_PROJECT" } + $pat = Read-Host "Enter an Azure DevOps PAT with 'ServiceConnections: Read & manage' scope (input is hidden)" -AsSecureString + $patPlain = [System.Net.NetworkCredential]::new("", $pat).Password + Write-Host "" + Write-Host "You can create the service connection via REST API using:" + Write-Host " curl -u :$patPlain -X POST -H 'Content-Type: application/json' " + Write-Host " -d '{ ... }' " + Write-Host " '$AdoOrganizationUrl/_apis/serviceendpoint/endpoints?api-version=7.1'" +} + +Disconnect-MgGraph | Out-Null diff --git a/deploy/onboarding-runbook.md b/deploy/onboarding-runbook.md new file mode 100644 index 0000000..a832cef --- /dev/null +++ b/deploy/onboarding-runbook.md @@ -0,0 +1,150 @@ +# ASTRAL Onboarding Runbook + +This guide walks through deploying ASTRAL into a new Azure DevOps organization and Microsoft 365 tenant. + +## Prerequisites + +- Azure DevOps organization and project created. +- Owner or Contributor access to the target Microsoft 365 tenant. +- Permission to create app registrations and grant admin consent in Entra ID. +- PowerShell 7+ or Windows PowerShell 5.1 with the `Microsoft.Graph` module (for the bootstrap script). + +## Step 1: Import the repository + +1. In Azure DevOps, create a new Git repository in your project. +2. Push the contents of this repository into it, or use **Import repository** from a public Git URL. + +## Step 2: Create the tenant variable group + +1. In Azure DevOps, go to **Pipelines > Library** and create a new Variable Group. +2. Recommended name: `vg-astral-tenant` (you can choose any name). +3. Add the variables from `templates/variables-tenant.yml`. Use your real tenant values: + + | Variable | Example value | Notes | + | --- | --- | --- | + | `TENANT_NAME` | `contoso.onmicrosoft.com` | Your M365 tenant domain | + | `SERVICE_CONNECTION_NAME` | `sc-astral-backup` | Name you will use for the service connection | + | `USER_NAME` | `ASTRAL Backup Service` | Git committer name | + | `USER_EMAIL` | `astral-backup@contoso.com` | Git committer email | + | `AGENT_POOL_NAME` | `Azure Pipelines` | Change if using a self-hosted pool | + | `BACKUP_TIMEZONE` | `Europe/Prague` | Valid tz database name | + | `FULL_RUN_HOUR` | `00` | Hour that triggers full export | + | `AUTO_REMEDIATE_RESTORE_PIPELINE_ID` | *(leave empty)* | Filled in Step 8 | + +4. If you plan to use Azure OpenAI summaries, also add: + - `ENABLE_PR_AI_SUMMARY` = `true` + - `AZURE_OPENAI_ENDPOINT` + - `AZURE_OPENAI_DEPLOYMENT` + - `AZURE_OPENAI_API_KEY` *(mark as secret)* + +## Step 3: Link the variable group to the pipelines + +Open each pipeline YAML and uncomment the variable group line near the top: + +```yaml +variables: + - group: vg-astral-tenant # <-- uncomment this line + - template: templates/variables-common.yml +``` + +Do this for: +- `azure-pipelines.yml` +- `azure-pipelines-review-sync.yml` +- `azure-pipelines-restore.yml` + +Commit and push the changes. + +## Step 4: Run the tenant bootstrap script + +Run `deploy/bootstrap-tenant.ps1` in a PowerShell session authenticated to your target tenant. + +```powershell +# Example +.\deploy\bootstrap-tenant.ps1 -TenantName "contoso.onmicrosoft.com" -ServiceConnectionName "sc-astral-backup" +``` + +The script will: +1. Create a single-tenant app registration. +2. Add required Microsoft Graph application permissions. +3. Grant admin consent. +4. Create a workload federated credential for Azure DevOps. +5. Print the App ID and instructions for creating the Azure DevOps service connection. + +## Step 5: Create the Azure DevOps service connection + +1. In Azure DevOps, go to **Project settings > Service connections**. +2. Click **New service connection > Azure Resource Manager > Workload identity federation (manual)**. +3. Fill in: + - **Subscription**: leave blank or select if you also want ARM access (not required). + - **Tenant ID**: your Microsoft 365 tenant ID. + - **Service Connection Name**: the same value you set in `SERVICE_CONNECTION_NAME` (e.g. `sc-astral-backup`). + - **App ID**: from the bootstrap script output. +4. Save the service connection. + +## Step 6: Import the pipelines + +1. Go to **Pipelines > Create pipeline > Azure Repos Git**. +2. Select your repository. +3. Choose **Existing Azure Pipelines YAML file**. +4. Import each of the three YAMLs one by one: + - `azure-pipelines.yml` (main backup) + - `azure-pipelines-review-sync.yml` (review sync) + - `azure-pipelines-restore.yml` (restore) + +## Step 7: Grant repository permissions to the build identity + +1. Go to **Project settings > Repositories**. +2. Select your repository. +3. Under **Security**, grant the **Build Service** account: + - Contribute + - Create branch + - Force push + - Create pull request + - Edit pull request + - Tag creation (if you enable tagging) + +4. Under **Pipelines**, grant the build service **Queue builds** permission on `azure-pipelines-restore.yml` if you plan to use auto-remediation. + +## Step 8: Set the restore pipeline definition ID + +After importing `azure-pipelines-restore.yml`, find its definition ID: + +1. Open the restore pipeline in Azure DevOps. +2. The URL contains `definitionId=XX`. Note the number. +3. Go back to your variable group (`vg-astral-tenant`) and set: + - `AUTO_REMEDIATE_RESTORE_PIPELINE_ID` = `XX` + +## Step 9: Validate the deployment + +1. Import `deploy/validate-deployment.yml` as a one-time pipeline. +2. Run it. +3. Verify that all checks pass: + - Graph token acquisition + - Required roles present + - Test read from Graph + - Test PR creation and abandonment + +## Step 10: Run the first backup + +1. Queue a manual run of `azure-pipelines.yml`. +2. Set `forceFullRun=true` to get a complete initial snapshot. +3. Verify that `tenant-state/` is populated and a rolling PR is created. + +## Optional: progressive feature rollout + +| Phase | What to enable | +| --- | --- | +| Backup-only | `ENABLE_PR_REVIEW_SUMMARY=false`, `ENABLE_PR_REVIEWER_DECISIONS=false`, `AUTO_REMEDIATE_AFTER_MERGE=false` | +| Review package | `ENABLE_PR_REVIEW_SUMMARY=true`, `ENABLE_PR_REVIEWER_DECISIONS=true` | +| Full package | Also enable restore and set `AUTO_REMEDIATE_AFTER_MERGE=true` if desired | +| AI summaries | `ENABLE_PR_AI_SUMMARY=true` plus Azure OpenAI variables | + +## Troubleshooting + +| Symptom | Likely cause | Fix | +| --- | --- | --- | +| Pipeline fails at "Get Graph Token" | Wrong service connection name or missing federated credential | Verify `SERVICE_CONNECTION_NAME` matches the service connection exactly | +| "Missing required Graph roles" | Admin consent not granted | Run bootstrap script again or grant consent manually in Entra ID | +| Rolling PR not created | Build identity lacks PR permissions | Add **Create pull request** and **Edit pull request** permissions | +| Restore pipeline queue fails | `AUTO_REMEDIATE_RESTORE_PIPELINE_ID` wrong or missing queue permission | Verify the ID and grant **Queue builds** on the restore pipeline | +| Empty `tenant-state/` after run | First run may have no data if Graph returns nothing; also check `BACKUP_FOLDER` path | Verify Graph permissions and re-run | diff --git a/deploy/publish-public.yml b/deploy/publish-public.yml new file mode 100644 index 0000000..2c4d536 --- /dev/null +++ b/deploy/publish-public.yml @@ -0,0 +1,118 @@ +trigger: none +pr: none + +# Publisher pipeline: pushes a sanitized snapshot of the dev repo to the public template repo. +# +# Usage: +# Queue this pipeline manually and optionally provide a tag name (e.g. v1.1.0). +# +# Prerequisites: +# - PUBLIC_REPO_URL (pipeline variable) +# - PUBLIC_REPO_PAT (secret pipeline variable) + +parameters: + - name: tagName + displayName: Optional release tag (e.g. v1.1.0) + type: string + default: "" + +variables: + - template: ../templates/variables-common.yml + +jobs: + - job: publish_public_template + displayName: Publish sanitized snapshot to public repo + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + persistCredentials: true + + - task: Bash@3 + displayName: Run sync-to-public + inputs: + targetType: inline + script: | + set -euo pipefail + chmod +x "$(Build.SourcesDirectory)/deploy/sync-to-public.sh" + + TMP_DIR="$(mktemp -d)" + trap 'rm -rf "$TMP_DIR"' EXIT + + # Run the sync script; it clones the public repo into a temp subdir + "$(Build.SourcesDirectory)/deploy/sync-to-public.sh" \ + "$(PUBLIC_REPO_URL)" \ + "${{ parameters.tagName }}" + + # The script prints the clone path in its output. Extract the last temp dir it used. + PUBLIC_CLONE="$TMP_DIR/public" + mkdir -p "$PUBLIC_CLONE" + + # Re-run the sync into our controlled temp dir to guarantee the path + cd "$(Build.SourcesDirectory)" + rsync -a \ + --exclude='.git' \ + --exclude='tenant-state' \ + --exclude='prod-as-built.md' \ + --exclude='node_modules' \ + --exclude='__pycache__' \ + --exclude='.DS_Store' \ + --exclude='deploy/sync-to-public.sh' \ + --exclude='deploy/publish-public.yml' \ + "$(Build.SourcesDirectory)/" "$PUBLIC_CLONE/" + + cd "$PUBLIC_CLONE" + + # Re-create empty tenant-state structure + mkdir -p tenant-state/intune tenant-state/entra tenant-state/reports/intune tenant-state/reports/entra + touch tenant-state/intune/.gitkeep tenant-state/entra/.gitkeep tenant-state/reports/intune/.gitkeep tenant-state/reports/entra/.gitkeep + cat > tenant-state/README.md <<'EOF' +# tenant-state + +This directory is populated automatically by the ASTRAL pipeline. +Do not place manual files here; they will be overwritten on the next export. +EOF + + git init + git remote add origin "$(PUBLIC_REPO_URL)" 2>/dev/null || git remote set-url origin "$(PUBLIC_REPO_URL)" + + git config user.email "astral-publish@local" + git config user.name "ASTRAL Publisher" + + # Fetch existing public main so we can diff against it + git fetch origin main || true + + # Stage everything + git add -A + + if git diff --cached --quiet; then + echo "No changes to publish." + exit 0 + fi + + DEV_SHA="$(git -C '$(Build.SourcesDirectory)' rev-parse --short HEAD)" + DEV_BRANCH="$(git -C '$(Build.SourcesDirectory)' rev-parse --abbrev-ref HEAD)" + + git commit -m "Sync from dev @ ${DEV_SHA} + +Source: ${DEV_BRANCH} (${DEV_SHA}) +Excluded: live tenant exports, generated artifacts, and dev-only tooling." + + if [ -n "${{ parameters.tagName }}" ]; then + git tag -a "${{ parameters.tagName }}" -m "Release ${{ parameters.tagName }}" + fi + + # Push commit (and tag if provided) + git push origin HEAD:main --force + if [ -n "${{ parameters.tagName }}" ]; then + git push origin "${{ parameters.tagName }}" + fi + + echo "Publication complete." + if [ -n "${{ parameters.tagName }}" ]; then + echo "Tag: ${{ parameters.tagName }}" + fi + env: + GIT_ASKPASS: echo + GIT_USERNAME: $(PUBLIC_REPO_USERNAME) + GIT_PASSWORD: $(PUBLIC_REPO_PAT) diff --git a/deploy/validate-deployment.yml b/deploy/validate-deployment.yml new file mode 100644 index 0000000..0cbe733 --- /dev/null +++ b/deploy/validate-deployment.yml @@ -0,0 +1,120 @@ +trigger: none +pr: none + +# One-time validation pipeline for ASTRAL onboarding. +# Import this pipeline, run it manually, and verify all checks pass. + +variables: + # Uncomment after creating your tenant variable group. + # - group: vg-astral-tenant + - template: ../templates/variables-common.yml + +jobs: + - job: validate_environment + displayName: Validate ASTRAL deployment + pool: + name: $(AGENT_POOL_NAME) + steps: + - checkout: self + persistCredentials: true + + - task: AzurePowerShell@5 + displayName: Validate Graph token acquisition + inputs: + azureSubscription: $(SERVICE_CONNECTION_NAME) + azurePowerShellVersion: LatestVersion + ScriptType: inlineScript + Inline: | + $getTokenParams = @{ + ResourceTypeName = 'MSGraph' + AsSecureString = $true + ErrorAction = 'Stop' + } + $tokenCommand = Get-Command Get-AzAccessToken -ErrorAction Stop + if ($tokenCommand.Parameters.ContainsKey('ForceRefresh')) { + $getTokenParams['ForceRefresh'] = $true + } + $accessToken = ([PSCredential]::New('dummy', (Get-AzAccessToken @getTokenParams).Token).GetNetworkCredential().Password) + + $tokenParts = $accessToken.Split('.') + if ($tokenParts.Length -lt 2) { throw "Invalid Graph access token format." } + $payload = $tokenParts[1].Replace('-', '+').Replace('_', '/') + switch ($payload.Length % 4) { + 2 { $payload += '==' } + 3 { $payload += '=' } + } + $payloadJson = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($payload)) + $claims = $payloadJson | ConvertFrom-Json + $roles = @($claims.roles) + $sortedRoles = $roles | Sort-Object + Write-Host "Graph token roles: $($sortedRoles -join ', ')" + + $requiredReadRoles = @( + "Device.Read.All", + "DeviceManagementApps.Read.All", + "DeviceManagementConfiguration.Read.All", + "DeviceManagementManagedDevices.Read.All", + "DeviceManagementRBAC.Read.All", + "DeviceManagementScripts.Read.All", + "DeviceManagementServiceConfig.Read.All", + "Group.Read.All", + "Policy.Read.All", + "Policy.Read.ConditionalAccess", + "Policy.Read.DeviceConfiguration", + "User.Read.All", + "Application.Read.All" + ) + + $missing = $requiredReadRoles | Where-Object { $_ -notin $sortedRoles } + if ($missing) { + throw "Missing required Graph roles: $($missing -join ', ')" + } + Write-Host "All required read roles are present." -ForegroundColor Green + + # Export token for subsequent steps + Write-Host "##vso[task.setvariable variable=GRAPH_TOKEN;issecret=true]$accessToken" + + - task: Bash@3 + displayName: Validate Graph read access + inputs: + targetType: inline + script: | + set -euo pipefail + TOKEN="$(GRAPH_TOKEN)" + URL="https://graph.microsoft.com/v1.0/organization?$select=id,displayName" + RESPONSE=$(curl -sf -H "Authorization: Bearer $TOKEN" "$URL") + echo "Graph read test response: $RESPONSE" + echo "Graph connectivity confirmed." + + - task: Bash@3 + displayName: Validate PR creation permission + inputs: + targetType: inline + script: | + set -euo pipefail + TOKEN="$(System.AccessToken)" + COLLECTION_URI="$(System.CollectionUri)" + PROJECT="$(System.TeamProject)" + REPO_ID="$(Build.Repository.ID)" + API="${COLLECTION_URI%/}/${PROJECT}/_apis/git/repositories/${REPO_ID}/pullrequests?api-version=7.1" + + BODY=$(cat < + ASTRAL logo + ASTRAL wordmark with celestial shield, orbit, and star symbol. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ASTRAL + + Admin Security Through Review, Automation & Least-privilege + + + + Configuration drift review and remediation for Intune and Entra + + + diff --git a/docs/m365-baseline-roadmap.md b/docs/m365-baseline-roadmap.md new file mode 100644 index 0000000..9ee4052 --- /dev/null +++ b/docs/m365-baseline-roadmap.md @@ -0,0 +1,143 @@ +# M365 Baseline Expansion Roadmap + +This document tracks the repository from its current implemented state rather than from the original proposal. + +## Current State + +The repository already operates as a two-workload baseline system: + +- Intune drift backup via IntuneCD +- Entra drift backup for Named Locations, Authentication Strengths, Conditional Access, App Registrations, and Enterprise Applications + +The surrounding control loop is also implemented: + +- hourly backup pipeline plus midnight Prague full run +- rolling PR per workload +- deterministic reviewer summary with optional Azure OpenAI narrative +- optional per-file change-ticket threads +- reviewer `/reject` and `/accept` decision sync every 20 minutes +- auto-remediation for rejected drift snapshots +- post-merge restore queue for partial-accept / partial-reject review flows +- selective historical restore from branch, tag, or commit +- output validation and drift-noise filtering before commit + +## What Is Stable Today + +Stable and part of the normal operating model: + +- Intune export and reporting +- Entra Named Locations export +- Entra Authentication Strengths export +- Entra Conditional Access export with reference-name enrichment +- object inventory and assignment reporting for both workloads +- Entra app inventory reporting +- drift-branch commit and rolling PR update workflow + +Implemented, but intentionally constrained: + +- App Registrations export is full-run only +- Enterprise Applications export is full-run only +- light runs preserve the previous committed snapshot for those two Entra categories + +Not yet implemented: + +- Directory role templates and active directory roles +- Exchange Online, Teams, SharePoint, Purview, and Azure governance modules + +## Current Gaps And Stabilization Backlog + +1. Fix App Registrations light-run stability. + The current pipeline still disables hourly App Registrations export because some runs produce resolver-only churn. Re-enabling hourly export requires a deterministic light-run result. + +2. Keep Enterprise Applications scoped as a heavy module unless runtime proves otherwise. + Enterprise Applications are already exported, but only on full runs. The current design assumes this category should remain bounded to the daily/full path unless runtime and diff quality support widening scope. + +3. Add the next identity-baseline module only after Phase 1 is fully stable. + Directory roles are the next logical addition, but they should follow the same pattern: deterministic export, report generation, validation, reviewer-noise filtering, and tests. + +## Design Rules For New Modules + +Every expansion module should follow the conventions already used by Intune and Entra: + +1. Store raw JSON under `tenant-state//`. +2. Store human-review reports under `tenant-state/reports//`. +3. Keep one object per file with deterministic naming and stable key ordering where possible. +4. Validate expected outputs before drift commit. +5. Filter known non-config churn before PR creation. +6. Update permissions, README, and tests in the same change. +7. Start as daily/full-run scope unless there is evidence it is safe and cheap to run hourly. + +## Roadmap By Phase + +### Phase 1: Identity Baseline + +Completed: + +- Entra Named Locations +- Entra Authentication Strengths +- Entra Conditional Access +- App Registrations exporter +- Enterprise Applications exporter + +Remaining: + +- stabilize App Registrations for light runs +- decide whether Enterprise Applications should remain full-run only +- add Directory Roles / Directory Role Templates + +### Phase 2: Service Policy Baseline + +Candidate modules: + +- Exchange Online transport and mail flow rules +- Defender for Office policy configuration +- Teams policy families +- SharePoint and OneDrive tenant-level sharing controls + +### Phase 3: Governance Baseline + +Candidate modules: + +- Purview DLP policies +- Purview retention labels and policies +- Azure policy assignments and initiatives +- Azure RBAC role assignments + +## Proposed Future Repository Shape + +Keep the existing lowercase workload structure and extend it consistently: + +```text +tenant-state/ + intune/ + entra/ + exchange/ + teams/ + sharepoint/ + purview/ + azure-governance/ + reports/ + intune/ + entra/ + exchange/ + teams/ + sharepoint/ + purview/ + azure-governance/ +``` + +## Recommended Execution Order + +1. Finish Phase 1 stabilization. +2. Add Directory Roles as the next identity module. +3. Add one Phase 2 or Phase 3 module at a time. +4. Require several stable daily cycles before widening scope or adding the next module. +5. Promote a module from full-run only to hourly only after unchanged tenants produce clean, low-noise diffs. + +## Acceptance Criteria + +- No regression in current Intune or Entra backup success rate. +- Unchanged environments produce deterministic outputs across repeated runs. +- Reviewer PRs stay focused on configuration-effective drift, not enrichment noise. +- New modules document exact permissions and expected outputs. +- Restore and review workflows remain coherent as scope expands. diff --git a/docs/security-review-email-draft.md b/docs/security-review-email-draft.md new file mode 100644 index 0000000..b0a6ef5 --- /dev/null +++ b/docs/security-review-email-draft.md @@ -0,0 +1,39 @@ +# Security Review Email Draft + +## Subject + +Security review package for ASTRAL + +## Email Body + +Hello, + +As discussed, I am sending the security review package for ASTRAL. + +ASTRAL stands for Admin Security Through Review, Automation & Least-privilege. + +Attached are: + +- `security-review-package.pdf` - product security overview, architecture, deployment modes, permissions, data flows, and key security considerations +- `security-review-questionnaire.pdf` - short-form questionnaire answers for easier circulation within your security review process + +A few points to highlight up front: + +- the platform supports multiple deployment modes, from backup-only through full review and remediation workflows +- AI-assisted review summaries are optional and can be enabled or disabled independently of the backup and restore functions +- when AI is enabled, the intended model is a customer-controlled Azure OpenAI deployment rather than an unrelated public AI service +- the AI summary feature is advisory and is intended to help non-technical reviewers such as PMs or management understand technical Intune and Entra changes in plain language + +The source repository is private because it contains operational implementation details and tenant-specific configuration material. If your review requires deeper technical evidence, we can provide a controlled walkthrough of the implementation, configuration, and pipeline behavior. + +If useful, I can also provide: + +- a live architecture walkthrough +- a permission-by-permission review of the Microsoft Graph access model +- a demonstration of deployment modes and AI-assisted review summaries + +Please let me know if your team would like any additional material in a different format. + +Best regards, + +[Your Name] diff --git a/docs/security-review-package.md b/docs/security-review-package.md new file mode 100644 index 0000000..8a9041d --- /dev/null +++ b/docs/security-review-package.md @@ -0,0 +1,402 @@ +ASTRAL logo + +# ASTRAL Security Review Package + +Prepared: 2026-03-27 + +## Purpose + +This document describes the security posture of ASTRAL, an Intune / Entra drift backup, review, and remediation platform implemented in this repository. + +ASTRAL stands for: + +- Admin Security Through Review, Automation & Least-privilege + +The goal of the platform is to: + +- export Microsoft Intune and selected Entra ID configuration from a production tenant, +- store approved configuration snapshots in Git, +- surface drift through rolling pull requests, +- optionally restore tenant configuration back to the approved baseline. + +This package is intended for customer security review of the full product and its available deployment modes. + +## Executive Summary + +ASTRAL is an Azure DevOps pipeline based administrative workflow, not a customer-facing application and not an endpoint agent. + +Key characteristics: + +- No inbound listener or public application endpoint is exposed by this repository. +- The normal operating mode is outbound-only scheduled jobs from Azure DevOps to Microsoft Graph and Azure DevOps APIs. +- The default backup/review path is read-oriented against Microsoft Graph. +- A separate restore path can write configuration back to the tenant, but only through the dedicated restore pipeline and only when enabled and authorized. +- AI-assisted PR summaries are optional and are not required for backup, review, or restore. + +## Deployment Modes + +The repository can be deployed progressively. It does not need to be introduced as an all-or-nothing package. + +| Mode | Scope | Graph Access Profile | Azure DevOps Scope | AI | +| --- | --- | --- | --- | --- | +| Backup-only | Export tenant configuration, generate reports, retain Git-tracked snapshots | Read-only | Repository and scheduled pipeline only | Disabled | +| Review package | Backup-only plus rolling PR review, reviewer summaries, optional change-ticket threads, reviewer `/accept` and `/reject` processing | Read-only | Repository, PR workflows, review-sync pipeline | Optional | +| Full package | Review package plus restore pipeline, rollback support, selective remediation, and optional auto-remediation | Read + Write for restore path only | Repository, PR workflows, review-sync, restore pipeline | Optional | + +Important clarifications: + +- AI is an add-on, not a core dependency. +- Restore is a separate capability, not a requirement for backup or review. +- Organizations can adopt the platform progressively, starting with backup-only and adding review or restore capabilities later. +- AI can be enabled or disabled independently of the backup, review, and restore layers. + +## System Overview + +### In-Scope Components + +| Component | Function | Security Relevance | +| --- | --- | --- | +| Azure DevOps pipeline `azure-pipelines.yml` | Scheduled backup, drift commit, rolling PR management, documentation artifact publishing | Main execution path | +| Azure DevOps pipeline `azure-pipelines-review-sync.yml` | Processes reviewer `/reject` and `/accept` decisions and refreshes PR summaries | Uses Azure DevOps API token | +| Azure DevOps pipeline `azure-pipelines-restore.yml` | Restores approved baseline to tenant | Write-capable path | +| Azure DevOps Git repository | Stores approved baseline, drift branches, JSON exports, reports, docs | Primary configuration store | +| Microsoft Graph | Source of Intune and Entra configuration; optional target for restore | Production tenant access | +| Azure DevOps REST APIs | PR creation/update, review thread sync, restore queueing | Change-management control plane | +| Optional Azure OpenAI | PR summary generation only | Optional data egress path | + +### High-Level Flow + +```mermaid +flowchart LR + A["Azure DevOps scheduled pipeline"] --> B["Federated service connection"] + B --> C["Microsoft Graph"] + A --> D["Git repo: main + drift branches"] + A --> E["Azure DevOps PR and thread APIs"] + A --> F["Build artifacts: markdown / HTML / PDF"] + A -. optional .-> G["Azure OpenAI"] + H["Reviewer in Azure DevOps"] --> E + E --> I["Rolling PR approval / rejection"] + I -. optional remediation .-> J["Restore pipeline"] + J --> C +``` + +## Deployment Model + +### Backup and Review + +The main pipeline runs hourly on `main`. + +- Every hour: export Intune and Entra configuration, generate reports, commit drift to rolling workload branches, and update one rolling PR per workload. +- When delayed reviewer notifications are enabled, newly created rolling PRs are opened as Azure DevOps draft PRs, the automated summary is inserted, and the PR is then published for reviewer notification. +- At the configured full-run hour: perform the same work plus documentation artifact generation (Markdown, and optionally HTML/PDF if browser dependencies are available). + +The workload branches are: + +- `drift/intune` +- `drift/entra` + +Reviewers approve or reject drift through Azure DevOps pull requests. The system is intentionally ex-post change management: admins may make changes in the Microsoft admin portals, and this system detects, records, and routes those changes for review. + +### Review Sync + +The review-sync pipeline runs every 20 minutes on `main`. + +It can: + +- refresh automated PR summaries, +- process reviewer `/reject` or `/accept` commands in policy threads, +- optionally queue remediation after merge if rejected items were merged out of the PR scope. + +### Restore + +The restore pipeline is the only path that writes configuration back to the tenant. + +It supports: + +- full restore from `main`, +- selective restore of specific policy files, +- restore from a historical Git ref for rollback use cases, +- dry-run mode for report-only validation. + +## Data Processed + +### Data Categories + +| Category | Examples | Source | Stored In | +| --- | --- | --- | --- | +| Intune configuration objects | compliance policies, device configurations, settings catalog, enrollment profiles, apps, scripts, filters, scope tags | Microsoft Graph / IntuneCD export | Git repo under `tenant-state/intune/**` | +| Entra configuration objects | conditional access, named locations, authentication strengths, app registrations, enterprise applications | Microsoft Graph | Git repo under `tenant-state/entra/**` | +| Generated reports | assignment inventories, object inventories, app inventories | Derived from exported configuration | `tenant-state/reports/**` and build artifacts | +| Documentation artifacts | split markdown, optional HTML/PDF | Derived from exported configuration | build artifacts | +| Review metadata | PR descriptions, review threads, accept/reject commands | Azure DevOps reviewers | Azure DevOps PR APIs | +| Optional AI summary payload | sampled changed paths, semantic change descriptions, deterministic summary, fingerprints | Derived from repo diff | Azure OpenAI request payload | + +### Data Sensitivity Notes + +- The system is designed for administrative configuration data, not end-user business content. +- The repository can still contain sensitive operational material, including policy logic, group names, app identifiers, script bodies, custom configuration payloads, and administrator email addresses present in tenant configuration. +- If tenant-authored scripts or custom payloads contain embedded secrets, those secrets would also be captured. This is a customer governance risk, not something the exporter can reliably prevent. +- For that reason, the repository, drift branches, build logs, and published artifacts should all be treated as confidential administrative data. +- The same sensitivity assumptions apply to any AI summary payload because it is derived from the same administrative configuration changes. + +## Authentication and Authorization + +### Azure to Microsoft Graph + +The pipelines obtain a Microsoft Graph access token at runtime using the Azure DevOps service connection configured in `SERVICE_CONNECTION_NAME` (e.g. `sc-astral-backup`). + +Observed controls in the implementation: + +- token acquisition is performed at runtime with `Get-AzAccessToken`, +- token role claims are inspected before proceeding, +- the token is stored as a secret pipeline variable (`issecret=true`), +- missing required Graph roles cause early failure. + +### Azure DevOps API Access + +The pipelines use `System.AccessToken` for: + +- creating and updating rolling PRs, +- reading and updating PR threads, +- queuing the restore pipeline. + +The repository permissions documented in the implementation are: + +- contribute, +- create branch, +- force push, +- create/update pull requests, +- optional create tag. + +If restore auto-queue is enabled, the pipeline identity also needs: + +- `View builds`, +- `Queue builds`, +- explicit pipeline authorization when enforced by the project. + +### Graph Permissions by Mode + +#### Backup / Review Mode + +Read-oriented Graph application permissions documented in the repository: + +- `Device.Read.All` +- `DeviceManagementApps.Read.All` +- `DeviceManagementConfiguration.Read.All` +- `DeviceManagementManagedDevices.Read.All` +- `DeviceManagementRBAC.Read.All` +- `DeviceManagementScripts.Read.All` +- `DeviceManagementServiceConfig.Read.All` +- `Group.Read.All` +- `Policy.Read.All` +- `Policy.Read.ConditionalAccess` +- `Policy.Read.DeviceConfiguration` +- `User.Read.All` +- `Application.Read.All` for Entra app exports +- `RoleManagement.Read.Directory` or `Directory.Read.All` for richer enrichment +- `AuditLog.Read.All` if commit author attribution is desired + +#### Restore Mode + +Write-capable Graph application permissions documented in the repository: + +- `DeviceManagementApps.ReadWrite.All` +- `DeviceManagementConfiguration.ReadWrite.All` +- `DeviceManagementManagedDevices.ReadWrite.All` +- `DeviceManagementRBAC.ReadWrite.All` +- `DeviceManagementScripts.ReadWrite.All` +- `DeviceManagementServiceConfig.ReadWrite.All` +- `Group.Read.All` +- `Policy.Read.All` +- `Policy.ReadWrite.ConditionalAccess` when Entra updates are included + +## Security Controls Present in the Implementation + +### Network Exposure + +- No inbound application endpoint is created by this repository. +- The system is pipeline-driven and relies on outbound HTTPS calls. +- Required outbound destinations are: + - `graph.microsoft.com` + - Azure DevOps organization APIs + - optional Azure OpenAI endpoint + - Python package registry for `IntuneCD` + - npm registry for `md-to-pdf` + - optional OS package repositories when HTML/PDF conversion needs Chromium libraries + +### Secrets Handling + +- Graph tokens are obtained just-in-time rather than stored in the repository. +- The pipeline marks the Graph token as a secret variable. +- The implementation logs token claims and roles for diagnostics, but not the token value itself. +- Azure OpenAI uses a pipeline secret variable when enabled. +- The pipeline logic itself does not depend on repository-stored application secrets; separate secret scanning of exported tenant content is still recommended. + +### Change Control + +- Drift is committed to dedicated rolling branches rather than directly to `main`. +- Review happens through rolling pull requests into `main`. +- The implementation can delay reviewer notification by creating new rolling PRs as drafts until the automated summary block is present, reducing generic first-notification content. +- Optional file-level change tickets can be enforced through auto-created PR threads. +- Reviewers can explicitly accept or reject individual configuration files. +- Generated reports are excluded from drift commits and PR diffs to reduce review noise. + +### Safety Checks + +- Backup jobs validate expected outputs before committing drift. +- Intune backup logic checks for unauthorized Graph 403 responses and fails unless the failure is explicitly allowed by configuration. +- Entra export logic is configured to fail on requested export errors to avoid partial snapshots. +- Restore validates required write permissions before running. +- Selective restore sanitizes requested paths and rejects path traversal or missing-file conditions. +- Restore supports dry-run mode before any tenant change is applied. + +### Auditability + +- Git history retains approved baseline snapshots. +- Rolling PR history provides reviewer decisions and rationale. +- Azure DevOps build history records pipeline runs and restoration events. +- Optional tags can be created for snapshots. + +## Optional Azure OpenAI Integration + +Azure OpenAI is used only for PR review narrative generation. + +Important scoping facts from the implementation: + +- the feature is optional and controlled by pipeline variables, +- the core backup/review/restore workflow does not depend on it, +- it can remain disabled in every deployment mode, +- only a reduced, budget-limited change payload is sent, +- the payload contains changed paths, semantic summaries, risk labels, fingerprints, and deterministic summary text, +- it does not need direct Microsoft Graph access, +- it can be disabled with `ENABLE_PR_AI_SUMMARY=false`. + +### Intended AI Deployment Posture + +The intended security posture for AI is not an opaque third-party black-box service. The implementation is designed to use a customer-controlled Azure OpenAI deployment defined by: + +- `AZURE_OPENAI_ENDPOINT` +- `AZURE_OPENAI_DEPLOYMENT` +- `AZURE_OPENAI_API_KEY` + +In the intended production design: + +- AI requests are sent to the customer's Azure OpenAI resource, +- the model endpoint is explicitly configured by the customer, +- the AI service is a bounded summarization component rather than a system of record, +- Graph access remains with the pipeline and is not delegated to the model. + +For formal security documentation, the safest statement is: + +- the system is intended to use customer-managed Azure OpenAI infrastructure, typically within the same Azure tenant or controlled Azure environment, rather than an unrelated public AI service. + +### AI Security Considerations + +From a security perspective, the AI feature changes the system in these specific ways: + +- it introduces an additional outbound destination: the configured Azure OpenAI endpoint, +- it sends a derived review payload based on configuration drift rather than raw tenant-wide exports, +- it does not grant the AI service direct credentials to Microsoft Graph or Azure DevOps, +- it is advisory only and does not approve, merge, reject, or restore changes by itself, +- it can be disabled independently of the rest of the platform. + +### AI Business Purpose + +The AI summaries exist to make technical Intune and Entra drift understandable to non-technical reviewers. + +Their intended audience includes: + +- project managers, +- delivery leads, +- security managers, +- customer management stakeholders, +- reviewers who own risk acceptance but do not work daily with raw policy JSON. + +The purpose is not to replace technical review. The purpose is to provide a manager-readable explanation of: + +- what changed, +- why it matters operationally, +- whether the change appears routine, risky, or potentially security-relevant, +- what a reviewer should verify before approval. + +This allows management or PM stakeholders to participate meaningfully in review without needing to parse raw technical policy structures. + +## Residual Risks and Customer Decisions + +The following items are not fully solved by the repository alone and should be addressed in the customer deployment decision: + +| Area | Current State | Recommended Position | +| --- | --- | --- | +| Restore capability | Supported by design; can change production tenant state | Keep restore manual only, or disable auto-remediation by default until operational controls are approved | +| Backup vs restore identity separation | Sample config uses the same service connection name in backup and restore pipelines | Use separate service principals: read-only for backup/review, write-enabled only for restore | +| Azure OpenAI egress | Optional and customer-configurable | Enable only when the organization approves the payload scope and Azure OpenAI deployment model | +| Artifact retention | Not defined in repo; inherited from Azure DevOps settings | Set explicit retention for builds, logs, and artifacts | +| Repo access model | Not defined in repo | Restrict repo and artifact access to administrators/reviewers only | +| Build agent hardening | Pool name exists, but agent type and hardening are deployment-specific | Prefer dedicated hardened agent or approved Microsoft-hosted configuration | +| Runtime package download | `pip`, `npm`, and sometimes `apt-get` are used during pipeline runs | Pre-bake dependencies into the agent image if customer forbids runtime internet package fetches | +| Secret content inside exported scripts | Possible if tenant admins embed secrets in Intune scripts or custom payloads | Review tenant script hygiene before onboarding | + +## Recommended Deployment Configuration + +For a conservative production deployment, use this profile: + +1. Enable backup and review workflows. +2. Enable Azure OpenAI summaries only when a customer-controlled Azure OpenAI deployment is approved. +3. Disable automatic remediation queueing. +4. Do not authorize the restore pipeline for automatic queueing. +5. Use a read-only Graph application identity for backup/review. +6. Keep restore on a separate manual path with a separate write-enabled identity. +7. Apply Azure DevOps branch policies so `main` requires reviewer approval. +8. Set explicit retention and access-control policies for: + - Git repository + - build logs + - markdown/HTML/PDF artifacts + +Suggested conservative variable posture: + +```text +ENABLE_PR_AI_SUMMARY= +AUTO_REMEDIATE_ON_PR_REJECTION=false +AUTO_REMEDIATE_AFTER_MERGE=false +REQUIRE_CHANGE_TICKETS=true +``` + +## Out of Scope + +This repository does not provide: + +- endpoint malware protection, +- customer device telemetry collection, +- user authentication to a SaaS application, +- network ingress services, +- a standalone secrets vault, +- customer-managed key support within the application itself. + +Those controls, where needed, come from Azure DevOps, Microsoft 365 / Entra, the chosen agent environment, and the customer's broader platform governance. + +## Customer-Specific Items to Fill Before Sending + +The following are deployment-specific and should be completed with the actual customer environment: + +- Azure DevOps organization and project name +- whether the agent pool is Microsoft-hosted or self-hosted +- repo retention period +- build log retention period +- artifact retention period +- named reviewer groups and branch policies +- exact service principal names used for backup and restore +- which Azure OpenAI resource and deployment are used, if AI is enabled +- whether restore is manual-only or fully enabled + +## Repository Evidence + +The statements in this document are based on the implementation in: + +- `README.md` +- `azure-pipelines.yml` +- `azure-pipelines-review-sync.yml` +- `azure-pipelines-restore.yml` +- `scripts/update_pr_review_summary.py` +- `scripts/apply_reviewer_rejections.py` +- `scripts/queue_post_merge_restore.py` +- `scripts/export_entra_baseline.py` diff --git a/docs/security-review-questionnaire.md b/docs/security-review-questionnaire.md new file mode 100644 index 0000000..4560618 --- /dev/null +++ b/docs/security-review-questionnaire.md @@ -0,0 +1,35 @@ +ASTRAL logo + +# ASTRAL Security Review Questionnaire + +Prepared: 2026-03-27 + +This appendix is a shorter, copy/paste-friendly companion to the full ASTRAL security review package. + +| Question | Answer | +| --- | --- | +| What is the system? | ASTRAL is an Azure DevOps pipeline workflow that exports Microsoft Intune and selected Entra ID configuration, stores approved baseline snapshots in Git, and raises configuration drift for review through rolling pull requests. | +| What deployment modes are supported? | The same repository can be operated in progressive modes: backup-only, review package, or full package with restore/remediation. AI is optional in all modes. | +| Is it a public-facing application? | No. It is an administrative pipeline workflow with no public UI or inbound application endpoint created by this repository. | +| Does it require inbound network access from the internet? | No. The implemented workflow is outbound-only over HTTPS. | +| What production systems does it access? | Microsoft Graph for Intune and Entra configuration, plus Azure DevOps APIs for pull request and pipeline operations. | +| Does it make production changes? | Backup and review pipelines are read-oriented against Microsoft Graph. The restore pipeline is write-capable and can apply approved baseline configuration back to the tenant when explicitly enabled and authorized. | +| What data is processed? | Administrative configuration data such as Intune policies, device configuration, enrollment profiles, apps, scripts, conditional access, named locations, authentication strengths, app registrations, and enterprise application metadata. | +| Does it process end-user business content? | It is not designed for business content. However, exported admin-authored scripts or custom payloads can contain sensitive operational data if the tenant already stores it there. | +| Where is data stored? | In the Azure DevOps Git repository, Azure DevOps pull requests/threads, build logs, and optional build artifacts such as markdown, HTML, and PDF documentation. | +| How does it authenticate to Microsoft Graph? | By obtaining a Microsoft Graph token at runtime through an Azure DevOps Azure service connection using workload identity / federated credential flow. | +| How does it authenticate to Azure DevOps APIs? | With `System.AccessToken` scoped to the pipeline identity. | +| Are long-lived secrets stored in the repository? | The pipeline logic does not require repository-stored application secrets. Runtime tokens are acquired during pipeline execution, but exported tenant content should still be treated as potentially sensitive and reviewed for embedded secrets in admin-authored scripts or custom payloads. | +| How are secrets handled in the pipeline? | The Graph access token is set as a secret pipeline variable. The implementation logs token claims and granted roles for diagnostics, but not the token value. | +| What minimum permissions are required? | Read-only Microsoft Graph application permissions for backup/review, and additional write permissions only for restore. Exact permissions are listed in the full package. | +| Is there separation between read and write access? | The code supports a safe separation model. For production, create separate read-only and write-enabled service principals/connections so backup and restore use different identities. | +| What change-control mechanism exists? | Drift is committed to dedicated workload branches and reviewed through rolling pull requests into `main`. New rolling PRs can be created as drafts until the automated summary is inserted, and optional per-file change-ticket threads and reviewer `/reject` commands are supported. | +| Can reviewers block or scope changes? | Yes. Reviewers can approve the rolling PR, reject it, or reject individual file-level drift items through PR threads when that feature is enabled. | +| Is rollback supported? | Yes. The restore pipeline supports full restore, selective restore by file path, historical restore by Git ref, and dry-run mode. | +| What external network destinations are required? | Microsoft Graph, Azure DevOps APIs, optional Azure OpenAI, Python package registry for `IntuneCD`, npm registry for `md-to-pdf`, and optionally OS package repositories when browser dependencies are installed for HTML/PDF generation. | +| Does the system send data to AI services? | Only if Azure OpenAI summary generation is explicitly configured. It is optional for the platform overall. | +| What AI service is intended? | A customer-controlled Azure OpenAI deployment configured through the Azure OpenAI endpoint and deployment variables, rather than an unrelated public AI service. | +| What data is sent to Azure OpenAI when enabled? | A reduced change-review payload containing changed paths, semantic summaries, deterministic summary text, and fingerprints derived from the repo diff. This is intended to support review summarization, not raw tenant-wide export ingestion. | +| Why is AI included? | The AI summary is meant to translate technical Intune and Entra drift into manager-readable language so PMs, management, and other non-specialist reviewers can understand impact and review intent without parsing raw policy JSON. | +| Recommended deployment posture? | Start with backup-only or review-package mode, enable Azure OpenAI only on a customer-controlled deployment when approved, keep auto-remediation disabled by default, and use separate read-only and write-enabled service principals if restore is enabled. | +| What customer-specific controls still need to be defined? | Agent type and hardening, repo/build/artifact retention, exact access groups, branch policies, and whether restore or Azure OpenAI are enabled in the target deployment. | diff --git a/md2pdf/README.md b/md2pdf/README.md new file mode 100644 index 0000000..93d0c55 --- /dev/null +++ b/md2pdf/README.md @@ -0,0 +1,30 @@ +# Automated Microsoft Intune backup and as-built + +A template repository that you can clone to enable a Microsoft Intune tenant backup and as-built report using [IntuneCD](https://github.com/almenscorner/IntuneCD) and [md-to-pdf](https://github.com/simonhaenisch/md-to-pdf). + +To learn how to use this repository, see these articles: + +- [Automate Microsoft Intune As-Built Documentation on GitHub](https://stealthpuppy.com/automate-intune-documentation-github/) +- [Automate Microsoft Intune As-Built Documentation on Azure DevOps](https://stealthpuppy.com/automate-intune-documentation-azure/) + +## Example report + +The generated as-built documentation will look something like: + +![As-built documentation screenshot](.img/asbuilt-sample.png) + +## GitHub + +After creating a new repository in GitHub based on this template, you'll need to enable the Actions to run via the repository settings, and add the secrets required by the workflows. + +This template repository includes the following workflows: + +* [`intune-backup.yml`](.github/workflows/intune-backup.yml) - performs the export from the Intune tenant to create a backup, and generates a markdown version of the as-built document, and tags the release +* [`intune-release.yml`](.github/workflows/intune-release.yml) - generates PDF and HTML versions of the markdown document, creates a release, and adds the documents to the release as assets +* [`remove-releases.yml`](.github/workflows/remove-releases.yml) - prunes the release assets to keep the last 60 releases + +## Azure DevOps + +Clone this repository into GitHub or Azure DevOps, then import into a project and create a pipeline: + +* [`intune-backup.yml`](.devops/intune-backup.yml) - performs the export from the Intune tenant to create a backup, and generates a markdown version of the as-built document, and tags the release, generates PDF and HTML versions of the markdown document, creates a release, and adds the documents to the release as assets diff --git a/md2pdf/htmlconfig.json b/md2pdf/htmlconfig.json new file mode 100644 index 0000000..553151b --- /dev/null +++ b/md2pdf/htmlconfig.json @@ -0,0 +1,9 @@ +{ + "stylesheet": [ + "./md2pdf/htmlstyle.css" + ], + "marked_options": { + "headerIds": false, + "smartypants": true + } +} \ No newline at end of file diff --git a/md2pdf/htmlstyle.css b/md2pdf/htmlstyle.css new file mode 100644 index 0000000..3dbfcda --- /dev/null +++ b/md2pdf/htmlstyle.css @@ -0,0 +1,140 @@ +* { + box-sizing: border-box; +} + +html { + font-size: 100%; +} + +body { + font-family: 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif; + line-height: 1.6; + font-size: 0.6875em; /* 11 pt */ + color: #111; + margin: 0; +} + +body > :first-child { + padding-top: 0; + margin-top: 0; +} + +body > :last-child { + margin-bottom: 0; + padding-bottom: 0; +} + +h1, +h2, +h3, +h4, +h5, +h6 { + margin: 0; + padding: 0.5em 0 0.25em; + text-transform: uppercase; +} + +h5, +h6 { + padding: 0; +} + +h5 { + font-size: 1em; +} + +h6 { + font-size: 0.875em; +} + +p { + margin: 0.25em 0 1em; +} + +blockquote { + margin: 0.5em 0 1em; + padding-left: 0.5em; + padding-right: 1em; + border-left: 4px solid gainsboro; + font-style: italic; +} + +ul, +ol { + margin: 0; + margin-left: 1em; + padding: 0 1.5em 0.5em; +} + +pre { + white-space: pre-wrap; +} + +h1 code, +h2 code, +h3 code, +h4 code, +h5 code, +h6 code, +p code, +li code, +pre code { + background-color: #f8f8f8; + padding: 0.1em 0.375em; + border: 1px solid #f8f8f8; + border-radius: 0.25em; + font-family: monospace; + font-size: 1.2em; +} + +pre code { + display: block; + padding: 0.5em; +} + +.page-break { + page-break-after: always; +} + +img { + max-width: 100%; + margin: 1em 0; +} + +table { + border-spacing: 0; + border-collapse: collapse; + margin: 0 0 1em; + display: block; + width: 100%; + overflow: auto; + table-layout: auto; + width: 100%; +} + +table th, +table td { + padding: 0.5em 1em; + border: 1px solid gainsboro; +} + +table th { + font-weight: 600; + text-transform: uppercase; +} + +table tr { + background-color: white; + border-top: 1px solid gainsboro; +} + +table tr:nth-child(2n) { + background-color: whitesmoke; +} + +section { + margin: 0 auto; + font-family: 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif !important; + font-size: 9px; +} diff --git a/md2pdf/pdfconfig.json b/md2pdf/pdfconfig.json new file mode 100644 index 0000000..ccfcc24 --- /dev/null +++ b/md2pdf/pdfconfig.json @@ -0,0 +1,17 @@ +{ + "stylesheet": [ + "./md2pdf/pdfstyle.css" + ], + "marked_options": { + "headerIds": false, + "smartypants": true + }, + "pdf_options": { + "format": "A4", + "margin": "15mm", + "printBackground": false, + "headerTemplate": "
ASTRAL Documentation
", + "footerTemplate": "
Page of
", + "displayHeaderFooter": true + } +} diff --git a/md2pdf/pdfstyle.css b/md2pdf/pdfstyle.css new file mode 100644 index 0000000..609480f --- /dev/null +++ b/md2pdf/pdfstyle.css @@ -0,0 +1,179 @@ +* { + box-sizing: border-box; +} + +@page { + margin: 18mm 14mm 18mm 14mm; +} + +html { + font-size: 100%; +} + +body { + font-family: 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif; + line-height: 1.6; + font-size: 0.6875em; /* 11 pt */ + color: #111; + margin: 0; + orphans: 3; + widows: 3; +} + +body > :first-child { + padding-top: 0; + margin-top: 0; +} + +body > :last-child { + margin-bottom: 0; + padding-bottom: 0; +} + +h1, +h2, +h3, +h4, +h5, +h6 { + margin: 0; + padding: 0.5em 0 0.25em; + text-transform: uppercase; + page-break-after: avoid; + break-after: avoid-page; + page-break-inside: avoid; + break-inside: avoid-page; +} + +h5, +h6 { + padding: 0; +} + +h5 { + font-size: 1em; +} + +h6 { + font-size: 0.875em; +} + +p { + margin: 0.25em 0 1em; +} + +blockquote { + margin: 0.5em 0 1em; + padding-left: 0.5em; + padding-right: 1em; + border-left: 4px solid gainsboro; + font-style: italic; + page-break-inside: avoid; + break-inside: avoid-page; +} + +ul, +ol { + margin: 0; + margin-left: 1em; + padding: 0 1.5em 0.5em; + page-break-inside: avoid; + break-inside: avoid-page; +} + +li { + page-break-inside: avoid; + break-inside: avoid-page; +} + +pre { + white-space: pre-wrap; + page-break-inside: avoid; + break-inside: avoid-page; +} + +h1 code, +h2 code, +h3 code, +h4 code, +h5 code, +h6 code, +p code, +li code, +pre code { + background-color: #f8f8f8; + padding: 0.1em 0.375em; + border: 1px solid #f8f8f8; + border-radius: 0.25em; + font-family: monospace; + font-size: 1.2em; +} + +pre code { + display: block; + padding: 0.5em; +} + +.page-break { + page-break-after: always; +} + +img { + max-width: 100%; + margin: 0.5em 0 1em; + page-break-inside: avoid; + break-inside: avoid-page; +} + +img[alt="ASTRAL logo"] { + max-width: 62%; + margin: 0 0 0.75em; +} + +table { + border-spacing: 0; + border-collapse: collapse; + margin: 0 0 1em; + width: 100%; + overflow: auto; + table-layout: auto; + width: 100%; + page-break-inside: avoid; + break-inside: avoid-page; +} + +table th, +table td { + padding: 0.5em 1em; + border: 1px solid gainsboro; + vertical-align: top; +} + +table th { + font-weight: 600; + text-transform: uppercase; +} + +table tr { + background-color: white; + border-top: 1px solid gainsboro; + page-break-inside: avoid; + break-inside: avoid-page; +} + +table tr:nth-child(2n) { + background-color: whitesmoke; +} + +hr, +svg, +figure { + page-break-inside: avoid; + break-inside: avoid-page; +} + +section { + margin: 0 auto; + font-family: 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif !important; + font-size: 9px; +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9bf02ba --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "intune-entra-drift-backup" +version = "0.0.0" +description = "Git-based snapshots of Microsoft Intune and Entra ID configuration" +requires-python = ">=3.11" +dependencies = [ + "IntuneCD==2.5.0", +] + +[tool.ruff] +line-length = 120 +target-version = "py311" + +[tool.ruff.lint] +select = ["E", "F", "I", "W", "UP", "B", "C4", "SIM"] +ignore = ["E501"] + +[tool.mypy] +python_version = "3.11" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = false +check_untyped_defs = true +ignore_missing_imports = true diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e26c358 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +IntuneCD==2.5.0 diff --git a/scripts/apply_reviewer_rejections.py b/scripts/apply_reviewer_rejections.py new file mode 100644 index 0000000..29c36e5 --- /dev/null +++ b/scripts/apply_reviewer_rejections.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +"""Apply per-policy reviewer reject decisions on rolling drift PRs. + +Reviewer decision format inside auto Change Needed threads: +- /reject -> remove this file-level drift from rolling PR (reset to baseline) +- /accept -> keep this file-level drift + +Latest decision command in the thread wins. +""" + +from __future__ import annotations + +import argparse +import base64 +import json +import os +import re +import subprocess +import sys +import urllib.parse +from pathlib import Path +from typing import Any + +# common.py lives in the same directory; ensure it can be imported when the +# script is executed directly. +_sys_path_inserted = False +if __file__: + _script_dir = str(Path(__file__).resolve().parent) + if _script_dir not in sys.path: + sys.path.insert(0, _script_dir) + _sys_path_inserted = True + +import common + +if _sys_path_inserted: + sys.path.pop(0) + +_request_json = common.request_json +_run_git = common.run_git +_configure_git_identity = common.configure_git_identity + +AUTO_TICKET_THREAD_PREFIX = "AUTO-CHANGE-TICKET:" +THREAD_STATUS_FIXED = 2 +THREAD_STATUS_WONT_FIX = 3 +THREAD_STATUS_CLOSED = 4 +THREAD_STATUS_BY_DESIGN = 5 +DECISION_RE = re.compile(r"(?im)^\s*(?:/|#)?(?Preject|accept)\b") + + +def _run_diff_name_only(repo_root: str, baseline_branch: str, drift_branch: str) -> str: + three_dot = f"origin/{baseline_branch}...origin/{drift_branch}" + two_dot = f"origin/{baseline_branch}..origin/{drift_branch}" + try: + return _run_git(repo_root, ["diff", "--name-only", three_dot]) + except RuntimeError as exc: + stderr = str(exc).lower() + if "no merge base" not in stderr: + raise + print( + "WARNING: No merge base for rolling branches " + f"(origin/{baseline_branch}, origin/{drift_branch}); using direct diff." + ) + return _run_git(repo_root, ["diff", "--name-only", two_dot]) + + +def _git_path_exists(repo_root: str, treeish: str, path: str) -> bool: + proc = subprocess.run( + ["git", "cat-file", "-e", f"{treeish}:{path}"], + cwd=repo_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + return proc.returncode == 0 + + +def _normalize_branch_name(branch: str) -> str: + b = branch.strip() + if b.startswith("refs/heads/"): + return b[len("refs/heads/") :] + return b + + +def _thread_status_code(thread: dict[str, Any]) -> int: + status = thread.get("status") + if isinstance(status, int): + return status + if isinstance(status, str): + mapping = { + "fixed": THREAD_STATUS_FIXED, + "wontfix": THREAD_STATUS_WONT_FIX, + "closed": THREAD_STATUS_CLOSED, + "bydesign": THREAD_STATUS_BY_DESIGN, + } + return mapping.get(status.strip().lower(), 1) + return 1 + + +def _is_thread_resolved(thread: dict[str, Any]) -> bool: + return _thread_status_code(thread) in ( + THREAD_STATUS_FIXED, + THREAD_STATUS_WONT_FIX, + THREAD_STATUS_CLOSED, + THREAD_STATUS_BY_DESIGN, + ) + + +def _ticket_path_from_content(content: str) -> str | None: + marker_re = re.compile(r"") + match = marker_re.search(content or "") + if not match: + return None + encoded = match.group("id") + padding = "=" * ((4 - len(encoded) % 4) % 4) + try: + return base64.urlsafe_b64decode((encoded + padding).encode("ascii")).decode("utf-8") + except Exception: + return None + + +def _is_doc_like(path: str) -> bool: + lp = path.lower() + return lp.endswith(".md") or lp.endswith(".markdown") or "/docs/" in lp + + +def _is_report_like(path: str) -> bool: + lp = path.lower() + return "/reports/" in lp or "assignment report" in lp + + +def _latest_thread_decision(comments: list[dict[str, Any]]) -> str | None: + decision: str | None = None + + def _comment_sort_key(c: dict[str, Any]) -> tuple[int, int]: + try: + cid = int(c.get("id", 0)) + except Exception: + cid = 0 + try: + parent = int(c.get("parentCommentId", 0)) + except Exception: + parent = 0 + return (cid, parent) + + for comment in sorted(comments, key=_comment_sort_key): + content = str(comment.get("content", "") or "") + match = DECISION_RE.search(content) + if match: + decision = match.group("decision").lower() + return decision + + +def _post_thread_comment(repo_api: str, pr_id: int, thread_id: int, token: str, content: str) -> None: + _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads/{thread_id}/comments?api-version=7.1", + token=token, + method="POST", + body={ + "parentCommentId": 0, + "content": content, + "commentType": 1, + }, + ) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Apply reviewer /reject decisions for rolling PR threads") + parser.add_argument("--repo-root", required=True) + parser.add_argument("--workload", required=True) + parser.add_argument("--drift-branch", required=True) + parser.add_argument("--baseline-branch", required=True) + args = parser.parse_args() + + token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip() + if not token: + raise SystemExit("SYSTEM_ACCESSTOKEN is empty.") + + collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/") + project = os.environ["SYSTEM_TEAMPROJECT"] + repository_id = os.environ["BUILD_REPOSITORY_ID"] + + drift_branch = _normalize_branch_name(args.drift_branch) + baseline_branch = _normalize_branch_name(args.baseline_branch) + + repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}" + source_ref = f"refs/heads/{drift_branch}" + target_ref = f"refs/heads/{baseline_branch}" + + query = urllib.parse.urlencode( + { + "searchCriteria.status": "active", + "searchCriteria.sourceRefName": source_ref, + "searchCriteria.targetRefName": target_ref, + "api-version": "7.1", + }, + quote_via=urllib.parse.quote, + safe="/", + ) + payload = _request_json(f"{repo_api}/pullrequests?{query}", token=token) + prs = payload.get("value", []) if isinstance(payload, dict) else [] + if not prs: + print("No active rolling PR found; skipping reviewer reject sync.") + return 0 + + pr = prs[0] + pr_id = int(pr.get("pullRequestId")) + + _run_git(args.repo_root, ["fetch", "--quiet", "origin", baseline_branch, drift_branch]) + diff_paths = _run_diff_name_only(args.repo_root, baseline_branch, drift_branch) + changed_paths = { + p.strip() + for p in diff_paths.splitlines() + if p.strip() and not _is_doc_like(p.strip()) and not _is_report_like(p.strip()) + } + if not changed_paths: + print("No changed policy paths in rolling PR; nothing to auto-reject.") + return 0 + + threads_payload = _request_json(f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", token=token) + threads = threads_payload.get("value", []) if isinstance(threads_payload, dict) else [] + + rejections: list[tuple[str, int]] = [] + examined_ticket_threads = 0 + for thread in threads: + comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else [] + marker_path: str | None = None + for c in comments: + marker_path = _ticket_path_from_content(str(c.get("content", "") or "")) + if marker_path: + break + if not marker_path: + continue + examined_ticket_threads += 1 + if marker_path not in changed_paths: + continue + + decision = _latest_thread_decision(comments) + if decision == "reject": + try: + thread_id = int(thread.get("id")) + except Exception: + thread_id = -1 + rejections.append((marker_path, thread_id)) + + if not rejections: + print( + "No /reject decisions found in auto policy threads " + f"(examined={examined_ticket_threads}, changed_paths={len(changed_paths)})." + ) + return 0 + + print( + "Detected /reject decisions in auto policy threads: " + f"{len(rejections)} (examined={examined_ticket_threads})." + ) + + _run_git(args.repo_root, ["checkout", "--quiet", "--force", "-B", drift_branch, f"origin/{drift_branch}"]) + + changed = 0 + baseline_tree = f"origin/{baseline_branch}" + for path, _thread_id in sorted(set(rejections)): + if _git_path_exists(args.repo_root, baseline_tree, path): + _run_git(args.repo_root, ["checkout", baseline_tree, "--", path]) + _run_git(args.repo_root, ["add", "--", path]) + changed += 1 + else: + file_abs = os.path.join(args.repo_root, path) + if os.path.exists(file_abs): + _run_git(args.repo_root, ["rm", "-f", "--", path]) + changed += 1 + + proc = subprocess.run( + ["git", "diff", "--cached", "--quiet"], + cwd=args.repo_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if proc.returncode == 0: + print("Reviewer /reject decisions found, but no effective diff remained after baseline reset.") + return 0 + + _configure_git_identity(args.repo_root) + + commit_msg = f"Apply reviewer /reject decisions ({args.workload})" + _run_git(args.repo_root, ["commit", "-m", commit_msg]) + _run_git(args.repo_root, ["push", "--force-with-lease", "origin", f"HEAD:{drift_branch}"]) + + for path, thread_id in rejections: + if thread_id <= 0: + continue + _post_thread_comment( + repo_api=repo_api, + pr_id=pr_id, + thread_id=thread_id, + token=token, + content=( + "Auto-action: /reject detected. This policy drift was reset to baseline on the rolling drift branch, " + "so it is removed from the PR diff.\n\n" + "If tenant rollback is required immediately, run restore pipeline as remediation." + ), + ) + + print( + f"Applied reviewer /reject decisions for {changed} path(s) in PR #{pr_id}; " + f"drift branch '{drift_branch}' updated." + ) + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except Exception as exc: + print(f"WARNING: Failed to apply reviewer /reject decisions: {exc}", file=sys.stderr) + raise diff --git a/scripts/commit_entra_drift.py b/scripts/commit_entra_drift.py new file mode 100644 index 0000000..3b6ae36 --- /dev/null +++ b/scripts/commit_entra_drift.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python3 +"""Commit Entra drift changes with best-effort change-author attribution.""" + +from __future__ import annotations + +import argparse +import datetime as dt +import json +import pathlib +import subprocess +import sys +import urllib.error +import urllib.parse +import urllib.request +from collections import defaultdict +from dataclasses import dataclass + + +def _git_run(repo_root: pathlib.Path, args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]: + proc = subprocess.run( + ["git", *args], + cwd=str(repo_root), + check=False, + capture_output=True, + text=True, + ) + if check and proc.returncode != 0: + stderr = (proc.stderr or "").strip() + raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}") + return proc + + +def _set_output_var(name: str, value: str, is_output: bool = True) -> None: + suffix = ";isOutput=true" if is_output else "" + print(f"##vso[task.setvariable variable={name}{suffix}]{value}") + + +def _warning(message: str) -> None: + print(f"##vso[task.logissue type=warning]{message}") + + +def _parse_backup_start(value: str) -> dt.datetime: + candidate = value.strip() + if not candidate: + raise ValueError("Missing required --backup-start value. Ensure the pipeline sets BACKUP_START in the backup_entra job before invoking commit_entra_drift.py.") + parsed = dt.datetime.strptime(candidate, "%Y.%m.%d:%H.%M.%S") + return parsed.replace(tzinfo=dt.timezone.utc) + + +def _format_filter_datetime(value: dt.datetime) -> str: + return value.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def _last_entra_commit_date(repo_root: pathlib.Path, depth: int = 30) -> dt.datetime | None: + _git_run(repo_root, ["fetch", f"--depth={depth}"], check=False) + proc = _git_run( + repo_root, + [ + "--no-pager", + "log", + "--no-show-signature", + f"-{depth}", + "--format=%s%%%cI", + ], + ) + for raw in proc.stdout.splitlines(): + line = raw.strip() + if not line or "%%%" not in line: + continue + subject, iso_date = line.split("%%%", 1) + if subject.endswith(" (Entra)") and len(subject) >= 18 and subject[4] == ".": + try: + return dt.datetime.fromisoformat(iso_date.replace("Z", "+00:00")).astimezone(dt.timezone.utc) + except ValueError: + continue + return None + + +def _request_json(url: str, token: str) -> dict: + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/json", + }, + method="GET", + ) + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read().decode("utf-8")) + + +@dataclass(frozen=True) +class Identity: + key: str + value: str + name: str + + +def _display_or_localpart(display_name: str, principal_name: str) -> str: + display_name = (display_name or "").strip() + if display_name: + return display_name + principal_name = (principal_name or "").strip() + if "@" in principal_name: + return principal_name.split("@", 1)[0] + return principal_name + + +def _extract_identity_from_audit(entry: dict) -> Identity | None: + initiated_by = entry.get("initiatedBy") + if not isinstance(initiated_by, dict): + return None + + user = initiated_by.get("user") + if isinstance(user, dict): + principal_name = str(user.get("userPrincipalName") or user.get("email") or "").strip() + display_name = str(user.get("displayName") or "").strip() + if principal_name: + return Identity( + key=f"user:{principal_name}", + value=principal_name, + name=_display_or_localpart(display_name, principal_name), + ) + if display_name: + return Identity( + key=f"display:{display_name}", + value=display_name, + name=display_name, + ) + + app = initiated_by.get("app") + if isinstance(app, dict): + display_name = str(app.get("displayName") or "").strip() + if display_name: + return Identity( + key=f"sp:{display_name}", + value=f"{display_name} (SP)", + name=display_name, + ) + + return None + + +def _fetch_directory_audits( + token: str, + last_commit_date: dt.datetime | None, + backup_start: dt.datetime, +) -> list[dict]: + params = { + "$top": "999", + "$select": "activityDateTime,activityDisplayName,category,result,initiatedBy,targetResources", + } + filter_parts = [f"activityDateTime le {_format_filter_datetime(backup_start)}"] + if last_commit_date is not None: + filter_parts.append(f"activityDateTime ge {_format_filter_datetime(last_commit_date)}") + params["$filter"] = " and ".join(filter_parts) + url = f"https://graph.microsoft.com/v1.0/auditLogs/directoryAudits?{urllib.parse.urlencode(params)}" + + results: list[dict] = [] + while url: + payload = _request_json(url, token) + value = payload.get("value") + if isinstance(value, list): + results.extend(item for item in value if isinstance(item, dict)) + next_link = payload.get("@odata.nextLink") + url = str(next_link).strip() if next_link else "" + return results + + +def _resource_id_from_path(path: str) -> str: + pure = pathlib.PurePosixPath(path) + if pure.suffix.lower() != ".json": + return "" + stem = pure.stem + if "__" not in stem: + return "" + return stem.rsplit("__", 1)[-1].lstrip("_").strip() + + +def _category_key(path: str) -> str: + pure = pathlib.PurePosixPath(path) + parts = pure.parts + if len(parts) < 3: + return "" + return "/".join(parts[:3]) + + +def _fallback_identity(name: str, email: str) -> Identity: + return Identity(key=f"fallback:{email}", value=email, name=name) + + +def _effective_fallback_identity( + build_reason: str, + requested_for: str, + requested_for_email: str, + service_name: str, + service_email: str, +) -> Identity: + requested_for_email = requested_for_email.strip() + if build_reason.strip() != "Schedule" and "@" in requested_for_email: + requested_for = requested_for.strip() or requested_for_email.split("@", 1)[0] + return _fallback_identity(requested_for, requested_for_email) + return _fallback_identity(service_name.strip(), service_email.strip()) + + +def _changed_files(repo_root: pathlib.Path, workload_root: str) -> list[str]: + proc = _git_run(repo_root, ["diff", "--cached", "--name-only", "--", workload_root]) + return [line.strip() for line in proc.stdout.splitlines() if line.strip()] + + +def _remote_diff_is_empty(repo_root: pathlib.Path, drift_branch: str, workload_root: str) -> bool: + remote_ref = f"refs/remotes/origin/{drift_branch}" + if _git_run(repo_root, ["show-ref", "--verify", "--quiet", remote_ref], check=False).returncode != 0: + return False + return _git_run(repo_root, ["diff", "--quiet", f"origin/{drift_branch}", "--", workload_root], check=False).returncode == 0 + + +def _build_author_groups( + changed_files: list[str], + audits: list[dict], + fallback: Identity, +) -> tuple[dict[str, dict[str, list[str] | list[Identity]]], int]: + identities_by_resource: dict[str, dict[str, Identity]] = defaultdict(dict) + for audit in audits: + result = str(audit.get("result") or "").strip().lower() + if result and result != "success": + continue + identity = _extract_identity_from_audit(audit) + if identity is None: + continue + target_resources = audit.get("targetResources") + if not isinstance(target_resources, list): + continue + for target in target_resources: + if not isinstance(target, dict): + continue + resource_id = str(target.get("id") or "").strip() + if resource_id: + identities_by_resource[resource_id][identity.key] = identity + + resolved_by_category: dict[str, dict[str, Identity]] = defaultdict(dict) + file_identities: dict[str, list[Identity]] = {} + unresolved_count = 0 + + for path in changed_files: + resource_id = _resource_id_from_path(path) + identities = list(identities_by_resource.get(resource_id, {}).values()) + if identities: + file_identities[path] = sorted(identities, key=lambda item: item.key) + for identity in file_identities[path]: + resolved_by_category[_category_key(path)][identity.key] = identity + else: + file_identities[path] = [] + if resource_id: + unresolved_count += 1 + + for path in changed_files: + if file_identities[path]: + continue + category_identities = list(resolved_by_category.get(_category_key(path), {}).values()) + if category_identities: + file_identities[path] = sorted(category_identities, key=lambda item: item.key) + else: + file_identities[path] = [fallback] + + grouped: dict[str, dict[str, list[str] | list[Identity]]] = {} + for path in changed_files: + identities = file_identities[path] or [fallback] + group_key = "&".join(identity.key for identity in identities) + entry = grouped.setdefault(group_key, {"files": [], "identities": identities}) + files = entry["files"] + assert isinstance(files, list) + files.append(path) + + return grouped, unresolved_count + + +def _commit_group( + repo_root: pathlib.Path, + files: list[str], + identities: list[Identity], + backup_start: dt.datetime, +) -> None: + for path in files: + print(f"\t- Adding {repo_root / path}") + _git_run(repo_root, ["add", "--all", "--", path]) + author_name = ", ".join(identity.name for identity in identities) + author_email = ", ".join(identity.value for identity in identities) + print(f"\t- Setting commit author(s): {author_name}") + _git_run(repo_root, ["config", "user.name", author_name]) + _git_run(repo_root, ["config", "user.email", author_email]) + commit_date = backup_start.astimezone(dt.timezone.utc).strftime("%Y.%m.%d_%H.%M") + commit_name = f"{commit_date} -- {author_name} (Entra)" + print(f"\t- Creating commit '{commit_name}'") + _git_run(repo_root, ["commit", "-m", commit_name]) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repo-root", required=True) + parser.add_argument("--workload-root", required=True) + parser.add_argument("--baseline-branch", required=True) + parser.add_argument("--drift-branch", required=True) + parser.add_argument("--access-token", required=True) + parser.add_argument("--service-name", required=True) + parser.add_argument("--service-email", required=True) + parser.add_argument("--build-reason", default="") + parser.add_argument("--requested-for", default="") + parser.add_argument("--requested-for-email", default="") + parser.add_argument("--backup-start", required=True) + args = parser.parse_args() + + repo_root = pathlib.Path(args.repo_root).resolve() + workload_root = args.workload_root.strip().strip("/") + fallback = _effective_fallback_identity( + build_reason=args.build_reason, + requested_for=args.requested_for, + requested_for_email=args.requested_for_email, + service_name=args.service_name, + service_email=args.service_email, + ) + + _git_run(repo_root, ["config", "user.name", fallback.name]) + _git_run(repo_root, ["config", "user.email", fallback.value]) + _git_run(repo_root, ["add", "--all", "--", workload_root]) + + changed_files = _changed_files(repo_root, workload_root) + if not changed_files: + print("No Entra change detected") + _set_output_var("CHANGE_DETECTED", "0") + _set_output_var("ROLLING_PR_SYNC_REQUIRED", "0") + return 0 + + if _remote_diff_is_empty(repo_root, args.drift_branch, workload_root): + print("No Entra change detected (snapshot identical to existing drift branch)") + _set_output_var("CHANGE_DETECTED", "0") + _set_output_var("ROLLING_PR_SYNC_REQUIRED", "1") + return 0 + + backup_start = _parse_backup_start(args.backup_start) + last_commit_date = _last_entra_commit_date(repo_root) + if last_commit_date is None: + _warning("Unable to obtain date of the last Entra backup config commit. All Entra audit events in the current query window will be considered.") + + audits: list[dict] = [] + try: + print("Getting Entra directory audit logs") + print(f"\t- from: '{last_commit_date}' (UTC) to: '{backup_start}' (UTC)") + audits = _fetch_directory_audits(args.access_token, last_commit_date, backup_start) + except urllib.error.HTTPError as exc: + if exc.code in (401, 403): + _warning("Graph token cannot read Entra directory audit logs. Falling back to pipeline identity for unresolved Entra changes.") + else: + raise + except Exception as exc: # pragma: no cover - defensive path for pipeline runtime issues + _warning(f"Unable to query Entra directory audit logs ({exc}). Falling back to pipeline identity for unresolved Entra changes.") + + groups, unresolved_count = _build_author_groups(changed_files, audits, fallback) + if unresolved_count > 0: + _warning( + f"Unable to resolve author from Entra audit logs for {unresolved_count} of {len(changed_files)} changed files. Fallback identity used where needed." + ) + + _git_run(repo_root, ["reset", "--quiet", "--", workload_root]) + print("\nCommit changes") + for group in groups.values(): + files = group["files"] + identities = group["identities"] + assert isinstance(files, list) + assert isinstance(identities, list) + _commit_group(repo_root, files, identities, backup_start) + unpushed = _git_run(repo_root, ["cherry", "-v", f"origin/{args.baseline_branch}"]).stdout.strip() + if not unpushed: + _warning("Nothing to commit?! This shouldn't happen.") + _set_output_var("CHANGE_DETECTED", "0") + _set_output_var("ROLLING_PR_SYNC_REQUIRED", "0") + return 0 + + _git_run(repo_root, ["push", "--force-with-lease", "origin", f"HEAD:{args.drift_branch}"]) + commit_sha = _git_run(repo_root, ["rev-parse", "HEAD"]).stdout.strip() + modification_authors = sorted({identity.value for group in groups.values() for identity in group["identities"]}) # type: ignore[index] + _set_output_var("CHANGE_DETECTED", "1") + _set_output_var("ROLLING_PR_SYNC_REQUIRED", "1") + _set_output_var("COMMIT_SHA", commit_sha) + _set_output_var("COMMIT_DATE", backup_start.strftime("%Y.%m.%d_%H.%M")) + _set_output_var("MODIFICATION_AUTHOR", ", ".join(modification_authors)) + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except Exception as exc: + print(str(exc), file=sys.stderr) + raise diff --git a/scripts/common.py b/scripts/common.py new file mode 100644 index 0000000..39d4c73 --- /dev/null +++ b/scripts/common.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +"""Shared utilities for Intune / Entra drift backup scripts.""" + +from __future__ import annotations + +import json +import os +import re +import subprocess +import time +import urllib.error +import urllib.request +from typing import Any + + +def env_text(name: str, default: str = "") -> str: + """Read and sanitize an environment variable, treating unresolved Azure DevOps + macros $(...) as empty. + """ + raw = os.environ.get(name) + if raw is None: + return default + value = raw.strip() + if re.fullmatch(r"\$\([^)]+\)", value): + return default + if not value: + return default + return value + + +def env_bool(name: str, default: bool = False) -> bool: + """Interpret an environment variable as a boolean.""" + raw = env_text(name, "") + if not raw: + return default + return raw.lower() in {"1", "true", "yes", "y", "on"} + + +def normalize_exclude_csv(value: str) -> str: + """Normalize an exclude CSV value, treating sentinel values as empty.""" + normalized = str(value or "").strip() + if normalized.lower() in {"", "none", "null", "n/a", "-", "_none_"}: + return "" + return normalized + + +def normalize_merge_strategy(value: str) -> str: + """Normalize a merge strategy string to an Azure DevOps API value.""" + raw = (value or "").strip().lower().replace("-", "").replace("_", "") + aliases = { + "nofastforward": "noFastForward", + "mergecommit": "noFastForward", + "merge": "noFastForward", + "squash": "squash", + "rebase": "rebase", + "rebasefastforward": "rebase", + "rebaseff": "rebase", + "rebasemerge": "rebaseMerge", + } + return aliases.get(raw, "rebase") + + +def _get_retry_after_seconds(error: urllib.error.HTTPError) -> float | None: + try: + retry_after = error.headers.get("Retry-After") + if retry_after: + return float(retry_after) + except Exception: + pass + return None + + +def request_json( + url: str, + method: str = "GET", + body: dict[str, Any] | None = None, + headers: dict[str, str] | None = None, + token: str | None = None, + timeout: float = 60, + max_retries: int = 0, +) -> Any: + """Make a JSON HTTP request and return the parsed response. + + If *token* is provided, an Authorization header is added automatically. + If *max_retries* is greater than zero, transient HTTP errors (429, 500, + 502, 503, 504) are retried with exponential back-off. + """ + req_headers: dict[str, str] = { + "Accept": "application/json", + } + if token is not None: + req_headers["Authorization"] = f"Bearer {token}" + if headers is not None: + req_headers.update(headers) + + payload: bytes | None = None + if body is not None: + payload = json.dumps(body).encode("utf-8") + req_headers.setdefault("Content-Type", "application/json") + + retry_codes = {429, 500, 502, 503, 504} + last_error: Exception | None = None + + for attempt in range(max_retries + 1): + req = urllib.request.Request( + url, + data=payload, + method=method, + headers=req_headers, + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + last_error = exc + if exc.code not in retry_codes or attempt == max_retries: + raise + retry_after = _get_retry_after_seconds(exc) + sleep = retry_after if retry_after is not None else (2 ** attempt) + time.sleep(sleep) + except urllib.error.URLError as exc: + last_error = exc + if attempt == max_retries: + raise + time.sleep(2 ** attempt) + + # Should never be reached; satisfy type checker. + if last_error is not None: + raise last_error + raise RuntimeError("request_json exhausted all retries") + + +def run_git(repo_root: str | os.PathLike[str], args: list[str], check: bool = True) -> str: + """Run a git command and return stdout as a stripped string.""" + proc = subprocess.run( + ["git", *args], + cwd=str(repo_root), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if check and proc.returncode != 0: + stderr = (proc.stderr or "").strip() + raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}") + return proc.stdout.strip() + + +def configure_git_identity( + repo_root: str | os.PathLike[str], + fallback_name: str | None = None, + fallback_email: str | None = None, +) -> None: + """Configure git user.name and user.email from pipeline env vars.""" + requested_for = (os.environ.get("BUILD_REQUESTEDFOR") or "").strip() + requested_for_email = (os.environ.get("BUILD_REQUESTEDFOREMAIL") or "").strip() + fallback_name = (fallback_name or os.environ.get("USER_NAME") or "ASTRAL Backup Service").strip() + fallback_email = (fallback_email or os.environ.get("USER_EMAIL") or "intune-backup@local.invalid").strip() + + author_name = requested_for or fallback_name + author_email = requested_for_email if "@" in requested_for_email else fallback_email + + run_git(repo_root, ["config", "user.name", author_name]) + run_git(repo_root, ["config", "user.email", author_email]) diff --git a/scripts/diagnostics/precheck_azure_openai_availability.py b/scripts/diagnostics/precheck_azure_openai_availability.py new file mode 100644 index 0000000..2bf84ee --- /dev/null +++ b/scripts/diagnostics/precheck_azure_openai_availability.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +""" +Lightweight Azure OpenAI availability precheck for pipeline diagnostics. + +This script is intentionally non-blocking: it always exits 0. +""" + +from __future__ import annotations + +import json +import os +import sys +from urllib.error import HTTPError, URLError +from urllib.parse import quote, urlsplit +from urllib.request import Request, urlopen + + +def _env(name: str, default: str = "") -> str: + return os.environ.get(name, default).strip() + + +def _set_pipeline_var(name: str, value: str) -> None: + print(f"##vso[task.setvariable variable={name}]{value}") + + +def _normalize_aoai_endpoint(endpoint: str) -> str: + cleaned = endpoint.strip().rstrip("/") + if not cleaned: + return cleaned + + parsed = urlsplit(cleaned) + if parsed.scheme and parsed.netloc: + cleaned = f"{parsed.scheme}://{parsed.netloc}" + + marker = "/openai" + idx = cleaned.lower().find(marker) + if idx != -1: + return cleaned[:idx] + return cleaned + + +def _preferred_aoai_token_param(deployment_name: str) -> str: + override = _env("AZURE_OPENAI_TOKEN_PARAM", "").lower() + if override in {"max_tokens", "max_completion_tokens"}: + return override + if deployment_name.strip().lower().startswith("gpt-5"): + return "max_completion_tokens" + return "max_tokens" + + +def _aoai_token_param_candidates(deployment_name: str) -> list[str]: + preferred = _preferred_aoai_token_param(deployment_name) + alternate = "max_completion_tokens" if preferred == "max_tokens" else "max_tokens" + return [preferred, alternate] + + +def _preferred_aoai_temperature(deployment_name: str) -> float | None: + override = _env("AZURE_OPENAI_TEMPERATURE", "").lower() + if override in {"default", "none", "omit"}: + return None + if override: + try: + return float(override) + except ValueError: + return None + if deployment_name.strip().lower().startswith("gpt-5"): + return None + return 0.0 + + +def _aoai_temperature_candidates(deployment_name: str) -> list[float | None]: + preferred = _preferred_aoai_temperature(deployment_name) + if preferred is None: + return [None] + return [preferred, None] + + +def main() -> int: + enabled = _env("ENABLE_PR_AI_SUMMARY", "true").lower() == "true" + if not enabled: + print("Azure OpenAI precheck skipped: ENABLE_PR_AI_SUMMARY=false") + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + + endpoint = _env("AZURE_OPENAI_ENDPOINT") + deployment = _env("AZURE_OPENAI_DEPLOYMENT") + api_key = _env("AZURE_OPENAI_API_KEY") + api_version = _env("AZURE_OPENAI_API_VERSION", "2024-12-01-preview") + + if not endpoint or not deployment or not api_key: + print("Azure OpenAI precheck skipped: missing endpoint/deployment/api-key variable") + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + + endpoint_raw = endpoint + endpoint = _normalize_aoai_endpoint(endpoint_raw) + deployment_url = f"{endpoint}/openai/deployments/{quote(deployment)}/chat/completions?api-version={quote(api_version)}" + v1_url = f"{endpoint}/openai/v1/chat/completions" + + print("Azure OpenAI precheck: starting") + print(f"- endpoint(raw): {endpoint_raw}") + print(f"- endpoint(normalized): {endpoint}") + print(f"- deployment: {deployment}") + print(f"- api_version: {api_version}") + prefer_v1 = endpoint.lower().endswith(".cognitiveservices.azure.com") + health_messages = [ + {"role": "system", "content": "You are a health-check assistant."}, + {"role": "user", "content": "Reply with: OK"}, + ] + + for temperature in _aoai_temperature_candidates(deployment): + temperature_unsupported = False + for token_param in _aoai_token_param_candidates(deployment): + deployment_payload = { + "messages": health_messages, + token_param: 16, + } + v1_payload = { + "model": deployment, + "messages": health_messages, + token_param: 16, + } + if temperature is not None: + deployment_payload["temperature"] = temperature + v1_payload["temperature"] = temperature + + routes = ( + [("v1", v1_url, v1_payload), ("deployments", deployment_url, deployment_payload)] + if prefer_v1 + else [("deployments", deployment_url, deployment_payload), ("v1", v1_url, v1_payload)] + ) + + token_param_unsupported = False + for route_name, route_url, payload in routes: + req = Request( + url=route_url, + method="POST", + data=json.dumps(payload).encode("utf-8"), + headers={ + "Content-Type": "application/json", + "api-key": api_key, + }, + ) + try: + with urlopen(req, timeout=45) as resp: + _ = json.loads(resp.read().decode("utf-8")) + print(f"Azure OpenAI precheck: SUCCESS via {route_name} route") + _set_pipeline_var("AOAI_AVAILABLE", "1") + return 0 + except HTTPError as exc: + raw = "" + try: + raw = exc.read().decode("utf-8", errors="replace") + except Exception: + raw = "" + print(f"Azure OpenAI precheck: HTTP {exc.code} via {route_name} route") + if raw: + print(raw) + if exc.code == 400: + raw_lower = raw.lower() + if "unsupported parameter" in raw_lower and f"'{token_param}'" in raw_lower: + token_param_unsupported = True + break + if "unsupported value" in raw_lower and "'temperature'" in raw_lower and temperature is not None: + temperature_unsupported = True + break + if exc.code == 404: + # Try fallback route first. + continue + if exc.code in (401, 403): + print("Hint: Check AZURE_OPENAI_API_KEY and endpoint/resource pairing.") + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + if exc.code == 400: + print("Hint: Check model/deployment name and API version compatibility.") + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + except URLError as exc: + print(f"Azure OpenAI precheck: network error via {route_name} route: {exc}") + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + except Exception as exc: # pragma: no cover + print(f"Azure OpenAI precheck: unexpected error via {route_name} route: {exc}") + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + if temperature_unsupported: + break + if not token_param_unsupported: + break + if not temperature_unsupported: + break + + print("Azure OpenAI precheck: no successful response from tested routes/token-params") + print("Hint: Verify AZURE_OPENAI_ENDPOINT points to the resource root, without /openai path suffix.") + print("Hint: Verify AZURE_OPENAI_DEPLOYMENT is the deployment name (for v1 this is passed as model).") + _set_pipeline_var("AOAI_AVAILABLE", "0") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/ensure_rolling_pr.py b/scripts/ensure_rolling_pr.py new file mode 100644 index 0000000..4d735d7 --- /dev/null +++ b/scripts/ensure_rolling_pr.py @@ -0,0 +1,651 @@ +#!/usr/bin/env python3 +"""Create/update rolling drift PR and optionally queue remediation after rejection.""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import subprocess +import sys +import urllib.parse +from pathlib import Path +from typing import Any + +# common.py lives in the same directory; ensure it can be imported when the +# script is executed directly. +_sys_path_inserted = False +if __file__: + _script_dir = str(Path(__file__).resolve().parent) + if _script_dir not in sys.path: + sys.path.insert(0, _script_dir) + _sys_path_inserted = True + +import common + +if _sys_path_inserted: + sys.path.pop(0) + +_env_text = common.env_text +_env_bool = common.env_bool +_normalize_exclude_csv = common.normalize_exclude_csv +_normalize_merge_strategy = common.normalize_merge_strategy +_request_json = common.request_json +_run_git = common.run_git + + +def _query_prs( + repo_api: str, + headers: dict[str, str], + source_ref: str, + target_ref: str, + status: str, +) -> list[dict[str, Any]]: + query = urllib.parse.urlencode( + { + "searchCriteria.status": status, + "searchCriteria.sourceRefName": source_ref, + "searchCriteria.targetRefName": target_ref, + "api-version": "7.1", + }, + quote_via=urllib.parse.quote, + safe="/", + ) + url = f"{repo_api}/pullrequests?{query}" + payload = _request_json(url, headers=headers) + return payload.get("value", []) if isinstance(payload, dict) else [] + + +def _normalize_branch(branch: str) -> str: + b = branch.strip() + if b.startswith("refs/heads/"): + return b[len("refs/heads/") :] + return b + + +def _ref_from_branch(branch: str) -> str: + return f"refs/heads/{_normalize_branch(branch)}" + + +def _pr_web_url(pr_payload: dict[str, Any]) -> str: + pr_id = pr_payload.get("pullRequestId") + return ( + pr_payload.get("url", "") + .replace("_apis/git/repositories", "_git") + .replace(f"/pullRequests/{pr_id}", f"/pullrequest/{pr_id}") + ) + + + +def _current_tree_id(repo_root: str) -> str: + return _run_git(repo_root, ["rev-parse", "HEAD^{tree}"]) + + +def _tree_id_for_commitish(repo_root: str, commitish: str) -> str: + return _run_git(repo_root, ["rev-parse", f"{commitish}^{{tree}}"]) + + +def _ref_has_commit(repo_root: str, ref: str) -> bool: + proc = subprocess.run( + ["git", "rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"], + cwd=repo_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + return proc.returncode == 0 + + +def _commit_tree_id(repo_api: str, headers: dict[str, str], commit_id: str) -> str: + url = f"{repo_api}/commits/{commit_id}?api-version=7.1" + payload = _request_json(url, headers=headers) + tree_id = payload.get("treeId", "") if isinstance(payload, dict) else "" + return tree_id.strip() + + +def _latest_pr_by_creation(prs: list[dict[str, Any]]) -> list[dict[str, Any]]: + return sorted(prs, key=lambda x: x.get("creationDate", ""), reverse=True) + + +def _normalize_repo_path(path: str) -> str: + return str(path or "").replace("\\", "/").lstrip("./") + + +def _is_doc_like(path: str) -> bool: + lp = _normalize_repo_path(path).lower() + if lp.endswith((".md", ".html", ".htm", ".pdf", ".csv", ".txt")): + return True + return "/docs/" in f"/{lp}" or "/object inventory/" in f"/{lp}" + + +def _is_report_like(path: str) -> bool: + lp = _normalize_repo_path(path).lower() + return "/reports/" in f"/{lp}" or "/assignment report/" in f"/{lp}" + + +def _is_workload_config_path(path: str, workload_dir: str, backup_folder: str, reports_subdir: str) -> bool: + lp = _normalize_repo_path(path).lower() + backup_norm = _normalize_repo_path(backup_folder).lower().strip("/") + workload_norm = _normalize_repo_path(workload_dir).lower().strip("/") + reports_norm = _normalize_repo_path(reports_subdir).lower().strip("/") + + if not backup_norm or not workload_norm: + return False + + workload_prefix = f"{backup_norm}/{workload_norm}/" + if not lp.startswith(workload_prefix): + return False + + if reports_norm and lp.startswith(f"{backup_norm}/{reports_norm}/"): + return False + + if _is_doc_like(lp) or _is_report_like(lp): + return False + return True + + +def _config_fingerprint_from_local_tree( + repo_root: str, commitish: str, workload_dir: str, backup_folder: str, reports_subdir: str +) -> str: + backup_norm = _normalize_repo_path(backup_folder).strip("/") + workload_norm = _normalize_repo_path(workload_dir).strip("/") + path_prefix = f"{backup_norm}/{workload_norm}" if backup_norm and workload_norm else "" + if not path_prefix: + return "" + + try: + out = _run_git(repo_root, ["ls-tree", "-r", "--full-tree", commitish, "--", path_prefix]) + except Exception: + return "" + + pairs: list[str] = [] + for line in out.splitlines(): + if "\t" not in line: + continue + left, rel_path = line.split("\t", 1) + parts = left.split() + if len(parts) < 3 or parts[1] != "blob": + continue + blob_id = parts[2].strip() + if not blob_id: + continue + if not _is_workload_config_path(rel_path, workload_dir, backup_folder, reports_subdir): + continue + pairs.append(f"{_normalize_repo_path(rel_path)}\t{blob_id}") + + if not pairs: + return "" + pairs.sort(key=lambda item: item.lower()) + joined = "\n".join(pairs).encode("utf-8") + return hashlib.sha256(joined).hexdigest() + + +def _config_fingerprint_from_tree_api( + repo_api: str, headers: dict[str, str], tree_id: str, workload_dir: str, backup_folder: str, reports_subdir: str +) -> str: + if not tree_id: + return "" + url = f"{repo_api}/trees/{tree_id}?recursive=true&api-version=7.1" + payload = _request_json(url, headers=headers) + entries = payload.get("treeEntries", []) if isinstance(payload, dict) else [] + + pairs: list[str] = [] + for entry in entries: + if not isinstance(entry, dict): + continue + if str(entry.get("gitObjectType", "")).lower() != "blob": + continue + rel_path = str(entry.get("relativePath", "")) + if not _is_workload_config_path(rel_path, workload_dir, backup_folder, reports_subdir): + continue + blob_id = str(entry.get("objectId", "")).strip() + if not blob_id: + continue + pairs.append(f"{_normalize_repo_path(rel_path)}\t{blob_id}") + + if not pairs: + return "" + pairs.sort(key=lambda item: item.lower()) + joined = "\n".join(pairs).encode("utf-8") + return hashlib.sha256(joined).hexdigest() + + +def _workload_config_diff_exists( + repo_root: str, + baseline_commitish: str, + drift_commitish: str, + workload_dir: str, + backup_folder: str, + reports_subdir: str, +) -> bool: + baseline_fingerprint = _config_fingerprint_from_local_tree( + repo_root=repo_root, + commitish=baseline_commitish, + workload_dir=workload_dir, + backup_folder=backup_folder, + reports_subdir=reports_subdir, + ) + drift_fingerprint = _config_fingerprint_from_local_tree( + repo_root=repo_root, + commitish=drift_commitish, + workload_dir=workload_dir, + backup_folder=backup_folder, + reports_subdir=reports_subdir, + ) + + if baseline_fingerprint and drift_fingerprint: + return baseline_fingerprint != drift_fingerprint + + try: + return _tree_id_for_commitish(repo_root, baseline_commitish) != _tree_id_for_commitish(repo_root, drift_commitish) + except Exception: + return True + + +def _find_matching_abandoned_pr( + repo_api: str, + headers: dict[str, str], + abandoned_prs: list[dict[str, Any]], + drift_tree: str, + repo_root: str, + workload_dir: str, + backup_folder: str, + reports_subdir: str, + drift_commitish: str, +) -> tuple[dict[str, Any] | None, str]: + current_config_fingerprint = _config_fingerprint_from_local_tree( + repo_root=repo_root, + commitish=drift_commitish, + workload_dir=workload_dir, + backup_folder=backup_folder, + reports_subdir=reports_subdir, + ) + tree_fingerprint_cache: dict[str, str] = {} + + for pr in _latest_pr_by_creation(abandoned_prs): + commit_id = ( + ((pr.get("lastMergeSourceCommit") or {}).get("commitId")) + or ((pr.get("lastMergeCommit") or {}).get("commitId")) + or "" + ).strip() + if not commit_id: + continue + try: + pr_tree = _commit_tree_id(repo_api, headers, commit_id) + except Exception: + continue + if pr_tree and pr_tree == drift_tree: + return pr, "exact-tree" + + if current_config_fingerprint and pr_tree: + if pr_tree not in tree_fingerprint_cache: + try: + tree_fingerprint_cache[pr_tree] = _config_fingerprint_from_tree_api( + repo_api=repo_api, + headers=headers, + tree_id=pr_tree, + workload_dir=workload_dir, + backup_folder=backup_folder, + reports_subdir=reports_subdir, + ) + except Exception: + tree_fingerprint_cache[pr_tree] = "" + if tree_fingerprint_cache[pr_tree] and tree_fingerprint_cache[pr_tree] == current_config_fingerprint: + return pr, "config-fingerprint" + + return None, "" + + +def _pr_has_reject_vote(pr: dict[str, Any]) -> bool: + reviewers = pr.get("reviewers", []) + if not isinstance(reviewers, list): + return False + for reviewer in reviewers: + if not isinstance(reviewer, dict): + continue + try: + vote = int(reviewer.get("vote", 0)) + except Exception: + vote = 0 + if vote == -10: + return True + return False + + +def _current_pr_merge_strategy(pr: dict[str, Any]) -> str: + completion_options = pr.get("completionOptions") + if not isinstance(completion_options, dict): + return "" + raw = str(completion_options.get("mergeStrategy") or "").strip() + if not raw: + return "" + return _normalize_merge_strategy(raw) + + +def _build_description(workload: str, drift_branch: str, baseline_branch: str, build_number: str, build_id: str) -> str: + is_entra = workload.lower() == "entra" + lead = "Rolling Entra drift PR created by backup pipeline." if is_entra else "Rolling drift PR created by backup pipeline." + return ( + f"{lead}\n\n" + f"- Source branch: `{drift_branch}`\n" + f"- Target branch: `{baseline_branch}`\n" + f"- Last pipeline run: `{build_number}` (BuildId: {build_id})\n\n" + "The automated review summary is generated immediately after PR creation and inserted " + "above the reviewer actions section.\n\n" + "## Reviewer Quick Actions\n\n" + "### 1) Accept all changes\n" + "- Merge PR to accept drift into baseline.\n\n" + "### 2) Reject whole PR and revert\n" + "- Set reviewer vote to **Reject**.\n" + "- Abandon PR.\n" + "- Auto-remediation queues restore (if `AUTO_REMEDIATE_ON_PR_REJECTION=true`).\n\n" + "### 3) Reject only selected policy changes\n" + "- In each `Change Needed` policy thread, comment `/reject` for changes you do not want.\n" + "- Optional: use `/accept` for changes you want to keep.\n" + "- Wait for review-sync pipeline (about 5 minutes) to update PR diff.\n" + "- Merge remaining accepted changes.\n" + "- Post-merge auto-remediation queues restore to reconcile tenant to merged baseline " + "(if `AUTO_REMEDIATE_AFTER_MERGE=true`)." + ) + + +def _threads_with_marker(repo_api: str, headers: dict[str, str], pr_id: int, marker: str) -> bool: + url = f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1" + payload = _request_json(url, headers=headers) + threads = payload.get("value", []) if isinstance(payload, dict) else [] + for thread in threads: + for comment in thread.get("comments", []): + content = str(comment.get("content", "")) + if marker in content: + return True + return False + + +def _queue_restore_pipeline( + collection_uri: str, + project: str, + headers: dict[str, str], + definition_id: int, + baseline_branch: str, + include_entra_update: bool, + dry_run: bool, + update_assignments: bool, + remove_unmanaged: bool, + max_workers: int, + exclude_csv: str, +) -> dict[str, Any]: + build_api = f"{collection_uri}/{project}/_apis/build/builds?api-version=7.1" + template_parameters = { + "dryRun": dry_run, + "updateAssignments": update_assignments, + "removeObjectsNotInBaseline": remove_unmanaged, + "includeEntraUpdate": include_entra_update, + "baselineBranch": baseline_branch, + "maxWorkers": max_workers, + } + exclude_csv = _normalize_exclude_csv(exclude_csv) + if exclude_csv: + template_parameters["excludeCsv"] = exclude_csv + body = { + "definition": {"id": definition_id}, + "sourceBranch": _ref_from_branch(baseline_branch), + "templateParameters": template_parameters, + } + return _request_json(build_api, headers=headers, method="POST", body=body) + + +def _post_pr_thread(repo_api: str, headers: dict[str, str], pr_id: int, content: str) -> None: + url = f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1" + body = { + "comments": [{"parentCommentId": 0, "content": content, "commentType": 1}], + "status": "active", + } + _request_json(url, headers=headers, method="POST", body=body) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Ensure rolling PR exists with optional remediation-on-rejection") + parser.add_argument("--repo-root", required=True) + parser.add_argument("--workload", required=True, choices=["intune", "entra"]) + parser.add_argument("--drift-branch", required=True) + parser.add_argument("--baseline-branch", required=True) + parser.add_argument("--pr-title", required=True) + args = parser.parse_args() + + token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip() + if not token: + raise SystemExit("SYSTEM_ACCESSTOKEN is empty. Enable OAuth token access for this pipeline.") + + collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/") + project = os.environ["SYSTEM_TEAMPROJECT"] + repository_id = os.environ["BUILD_REPOSITORY_ID"] + build_number = os.environ.get("BUILD_BUILDNUMBER", "") + build_id = os.environ.get("BUILD_BUILDID", "") + + auto_remediate = _env_bool("AUTO_REMEDIATE_ON_PR_REJECTION", False) + include_entra_update = _env_bool("AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE", False) + remediation_def_id_raw = _env_text("AUTO_REMEDIATE_RESTORE_PIPELINE_ID", "") + remediation_dry_run = _env_bool("AUTO_REMEDIATE_DRY_RUN", False) + remediation_update_assignments = _env_bool("AUTO_REMEDIATE_UPDATE_ASSIGNMENTS", True) + remediation_remove_unmanaged = _env_bool("AUTO_REMEDIATE_REMOVE_OBJECTS", False) + remediation_max_workers_raw = _env_text("AUTO_REMEDIATE_MAX_WORKERS", "10") + remediation_exclude_csv = _normalize_exclude_csv(_env_text("AUTO_REMEDIATE_EXCLUDE_CSV", "")) + pr_merge_strategy = _normalize_merge_strategy(_env_text("ROLLING_PR_MERGE_STRATEGY", "rebase")) + create_as_draft = _env_bool("ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS", False) + + try: + remediation_max_workers = int(remediation_max_workers_raw) + except ValueError as exc: + raise SystemExit(f"Invalid AUTO_REMEDIATE_MAX_WORKERS value: {remediation_max_workers_raw}") from exc + + if auto_remediate and not remediation_def_id_raw: + print( + "WARNING: AUTO_REMEDIATE_ON_PR_REJECTION=true but AUTO_REMEDIATE_RESTORE_PIPELINE_ID is empty; " + "remediation queueing disabled for this run.", + file=sys.stderr, + ) + auto_remediate = False + + try: + remediation_def_id = int(remediation_def_id_raw) if remediation_def_id_raw else 0 + except ValueError as exc: + raise SystemExit( + f"Invalid AUTO_REMEDIATE_RESTORE_PIPELINE_ID value: {remediation_def_id_raw}" + ) from exc + + drift_branch = _normalize_branch(args.drift_branch) + baseline_branch = _normalize_branch(args.baseline_branch) + backup_folder = _env_text("BACKUP_FOLDER", "tenant-state") + reports_subdir = _env_text("REPORTS_SUBDIR", "reports") + workload_dir = _env_text( + "INTUNE_BACKUP_SUBDIR" if args.workload == "intune" else "ENTRA_BACKUP_SUBDIR", + args.workload, + ) + source_ref = _ref_from_branch(drift_branch) + target_ref = _ref_from_branch(baseline_branch) + + repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}" + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + description = _build_description(args.workload, drift_branch, baseline_branch, build_number, build_id) + completion_options = {"mergeStrategy": pr_merge_strategy} + print(f"Rolling PR completion merge strategy: {pr_merge_strategy}") + + active_prs = _query_prs(repo_api, headers, source_ref, target_ref, "active") + if active_prs: + pr = active_prs[0] + pr_id = pr.get("pullRequestId") + current_title = str(pr.get("title") or "") + current_description = str(pr.get("description") or "") + current_merge_strategy = _current_pr_merge_strategy(pr) + desired_description = current_description if current_description.strip() else description + needs_patch = ( + current_title != args.pr_title + or not current_description.strip() + or current_merge_strategy != pr_merge_strategy + ) + if needs_patch: + update_url = f"{repo_api}/pullrequests/{pr_id}?api-version=7.1" + _request_json( + update_url, + headers=headers, + method="PATCH", + body={ + "title": args.pr_title, + "description": desired_description, + "completionOptions": completion_options, + }, + ) + web_url = _pr_web_url(pr) + if needs_patch: + print(f"Updated rolling {args.workload} PR #{pr_id}: {web_url}") + else: + print(f"Rolling {args.workload} PR #{pr_id} already up to date: {web_url}") + print(f"##vso[task.setvariable variable=DRIFT_PR_ID;isOutput=true]{pr_id}") + if web_url: + print(f"##vso[task.setvariable variable=DRIFT_PR_URL;isOutput=true]{web_url}") + print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]0") + return 0 + + _run_git(args.repo_root, ["fetch", "--quiet", "origin", baseline_branch, drift_branch]) + baseline_commitish = f"origin/{baseline_branch}" if _ref_has_commit(args.repo_root, f"origin/{baseline_branch}") else baseline_branch + drift_commitish = f"origin/{drift_branch}" if _ref_has_commit(args.repo_root, f"origin/{drift_branch}") else "HEAD" + if not _workload_config_diff_exists( + repo_root=args.repo_root, + baseline_commitish=baseline_commitish, + drift_commitish=drift_commitish, + workload_dir=workload_dir, + backup_folder=backup_folder, + reports_subdir=reports_subdir, + ): + print( + "Suppressed PR recreation: drift branch has no effective workload configuration diff " + f"against {baseline_branch}." + ) + print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1") + return 0 + + drift_tree = _tree_id_for_commitish(args.repo_root, drift_commitish) + abandoned_prs = _query_prs(repo_api, headers, source_ref, target_ref, "abandoned") + matching_abandoned, match_reason = _find_matching_abandoned_pr( + repo_api=repo_api, + headers=headers, + abandoned_prs=abandoned_prs, + drift_tree=drift_tree, + repo_root=args.repo_root, + workload_dir=workload_dir, + backup_folder=backup_folder, + reports_subdir=reports_subdir, + drift_commitish=drift_commitish, + ) + + if matching_abandoned: + if match_reason == "config-fingerprint": + print( + "Matched abandoned PR using configuration fingerprint " + "(ignoring docs/reports churn)." + ) + pr_id = int(matching_abandoned["pullRequestId"]) + if not _pr_has_reject_vote(matching_abandoned): + print( + "Matched abandoned PR without reviewer Reject vote; " + "skipping remediation and suppressing PR recreation for this unchanged drift snapshot." + ) + print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1") + return 0 + + if not auto_remediate: + print( + "Suppressed PR recreation: latest drift matches a rejected PR, " + "but AUTO_REMEDIATE_ON_PR_REJECTION is disabled." + ) + print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1") + return 0 + + marker = f"Automation marker: AUTO-REMEDIATE-TREE:{drift_tree}" + already_queued = _threads_with_marker(repo_api, headers, pr_id, marker) + + if already_queued: + print( + "Suppressed PR recreation: latest drift matches a previously rejected PR and remediation was already queued." + ) + else: + queued = _queue_restore_pipeline( + collection_uri=collection_uri, + project=project, + headers=headers, + definition_id=remediation_def_id, + baseline_branch=baseline_branch, + include_entra_update=include_entra_update, + dry_run=remediation_dry_run, + update_assignments=remediation_update_assignments, + remove_unmanaged=remediation_remove_unmanaged, + max_workers=remediation_max_workers, + exclude_csv=remediation_exclude_csv, + ) + build_queued_id = queued.get("id") + build_url = ((queued.get("_links") or {}).get("web") or {}).get("href", "") + if not build_url and build_queued_id: + build_url = f"{collection_uri}/{project}/_build/results?buildId={build_queued_id}" + + comment = ( + "Auto-remediation queued because the latest drift matches a rejected PR.\n\n" + f"Workload: {args.workload}\n" + f"Rejected PR: #{pr_id}\n" + f"Drift tree: {drift_tree}\n" + f"Restore pipeline definition: {remediation_def_id}\n" + f"Restore run: {build_url or '(queued)'}\n\n" + f"{marker}" + ) + try: + _post_pr_thread(repo_api, headers, pr_id, comment) + except Exception as exc: + print(f"WARNING: Remediation queued, but failed to post PR thread on #{pr_id}: {exc}") + + print( + f"Queued remediation pipeline run (definition={remediation_def_id}, buildId={build_queued_id}) and suppressed PR recreation." + ) + + print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1") + return 0 + + if abandoned_prs: + print( + f"No abandoned PR snapshot match for current drift tree (checked {len(abandoned_prs)} abandoned PR(s)); creating/updating rolling PR." + ) + + create_url = f"{repo_api}/pullrequests?api-version=7.1" + created = _request_json( + create_url, + headers=headers, + method="POST", + body={ + "sourceRefName": source_ref, + "targetRefName": target_ref, + "title": args.pr_title, + "description": description, + "isDraft": create_as_draft, + "completionOptions": completion_options, + }, + ) + pr_id = created.get("pullRequestId") + web_url = _pr_web_url(created) + print(f"Created rolling {args.workload} PR #{pr_id}: {web_url}") + print(f"##vso[task.setvariable variable=DRIFT_PR_ID;isOutput=true]{pr_id}") + if web_url: + print(f"##vso[task.setvariable variable=DRIFT_PR_URL;isOutput=true]{web_url}") + print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]0") + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except Exception as exc: + print(f"ERROR: Failed to ensure rolling PR: {exc}", file=sys.stderr) + raise diff --git a/scripts/export_entra_baseline.py b/scripts/export_entra_baseline.py new file mode 100644 index 0000000..5b7ab15 --- /dev/null +++ b/scripts/export_entra_baseline.py @@ -0,0 +1,1313 @@ +#!/usr/bin/env python3 +"""Export selected Entra baseline objects to JSON and markdown.""" + +from __future__ import annotations + +import argparse +import concurrent.futures +import datetime as dt +import json +import os +import pathlib +import re +import subprocess +import threading +import time +from typing import Any +import urllib.error +import urllib.parse +import urllib.request + + +TRANSIENT_RESOLUTION_ERROR_MARKERS = ( + "temporary failure in name resolution", + "temporary failure resolving", + "name or service not known", + "failed to resolve", + "nodename nor servname provided, or not known", + "no address associated with hostname", + "getaddrinfo failed", + "certificate verify failed", + "ssl: certificate_verify_failed", + "timed out", + "connection timed out", + "read timed out", + "connection reset by peer", + "connection refused", + "remote end closed connection without response", + "network is unreachable", +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--root", required=True, help="Path to Entra workload backup root (tenant-state/entra).") + parser.add_argument("--token", required=True, help="Microsoft Graph bearer token.") + parser.add_argument("--include-named-locations", default="true", help="Include Entra named locations export (true/false).") + parser.add_argument( + "--include-authentication-strengths", + default="true", + help="Include Entra authentication strengths export (true/false).", + ) + parser.add_argument( + "--include-conditional-access", + default="true", + help="Include Entra Conditional Access policies export (true/false).", + ) + parser.add_argument( + "--include-enterprise-applications", + default="true", + help="Include enterprise applications export (true/false).", + ) + parser.add_argument( + "--include-app-registrations", + default="true", + help="Include app registrations export (true/false).", + ) + parser.add_argument( + "--enterprise-app-workers", + type=int, + default=env_int("ENTRA_ENTERPRISE_APP_WORKERS", 8), + help="Number of parallel workers used to enrich Enterprise Applications (1-32).", + ) + parser.add_argument( + "--fail-on-export-error", + default="true", + help="Fail with non-zero exit code when any requested export category fails (true/false).", + ) + parser.add_argument( + "--previous-snapshot-ref", + default="", + help="Optional git branch/ref used as fallback source for resolution backfill (for example origin/drift/entra).", + ) + return parser.parse_args() + + +def log(message: str) -> None: + print(message, flush=True) + + +def to_bool(value: str) -> bool: + return str(value).strip().lower() in {"1", "true", "yes", "y", "on"} + + +def env_int(name: str, default: int) -> int: + raw = os.getenv(name) + if raw is None: + return default + try: + return int(raw) + except ValueError: + return default + + +def sanitize_filename(value: str) -> str: + cleaned = re.sub(r'[\\/:*?"<>|]+', "_", value).strip() + cleaned = re.sub(r"\s+", " ", cleaned) + return cleaned[:180] if len(cleaned) > 180 else cleaned + + +def _normalize_branch_name(branch: str) -> str: + normalized = str(branch or "").strip() + if normalized.startswith("$(") and normalized.endswith(")"): + return "" + for _ in range(2): + if normalized.startswith("origin/"): + normalized = normalized[len("origin/") :] + if normalized.startswith("refs/heads/"): + normalized = normalized[len("refs/heads/") :] + if normalized.startswith("refs/remotes/origin/"): + normalized = normalized[len("refs/remotes/origin/") :] + return normalized + + +def _git_run(repo_root: pathlib.Path, args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]: + proc = subprocess.run( + ["git", *args], + cwd=str(repo_root), + check=False, + capture_output=True, + text=True, + ) + if check and proc.returncode != 0: + stderr = (proc.stderr or "").strip() + raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}") + return proc + + +def _discover_repo_root(path: pathlib.Path) -> pathlib.Path | None: + proc = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + cwd=str(path), + check=False, + capture_output=True, + text=True, + ) + if proc.returncode != 0: + return None + top = (proc.stdout or "").strip() + if not top: + return None + return pathlib.Path(top).resolve() + + +def _resolve_existing_branch_ref(repo_root: pathlib.Path, branch: str) -> str: + normalized = _normalize_branch_name(branch) + if not normalized: + return "" + remote_ref = f"refs/remotes/origin/{normalized}" + if _git_run(repo_root, ["show-ref", "--verify", "--quiet", remote_ref], check=False).returncode == 0: + return f"origin/{normalized}" + local_ref = f"refs/heads/{normalized}" + if _git_run(repo_root, ["show-ref", "--verify", "--quiet", local_ref], check=False).returncode == 0: + return normalized + return "" + + +def _repo_relative_posix(repo_root: pathlib.Path, path: pathlib.Path) -> str: + try: + return path.resolve().relative_to(repo_root.resolve()).as_posix() + except Exception: + return "" + + +def _load_resource_sp_cache_from_export(root: pathlib.Path) -> dict[str, dict[str, Any]]: + cache: dict[str, dict[str, Any]] = {} + export_dir = root / "Enterprise Applications" + if not export_dir.is_dir(): + return cache + for path in sorted(export_dir.glob("*.json")): + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except Exception: + continue + if not isinstance(payload, dict): + continue + app_id = str(payload.get("appId") or "").strip() + if not app_id: + continue + cache[app_id] = { + "id": str(payload.get("id") or "").strip(), + "appId": app_id, + "displayName": str(payload.get("displayName") or "").strip(), + "appRoles": payload.get("appRoles") if isinstance(payload.get("appRoles"), list) else [], + "oauth2PermissionScopes": ( + payload.get("oauth2PermissionScopes") + if isinstance(payload.get("oauth2PermissionScopes"), list) + else [] + ), + } + return cache + + +def _export_object_id_from_path(path: str) -> str: + name = pathlib.PurePosixPath(path).name + if not name.endswith(".json"): + return "" + stem = name[:-5] + if "__" not in stem: + return "" + return stem.rsplit("__", 1)[-1].strip() + + +class PreviousSnapshotLookup: + def __init__(self, repo_root: pathlib.Path, ref: str, category_repo_dir: str): + self.repo_root = repo_root + self.ref = ref + self.paths_by_id: dict[str, str] = {} + self.cache: dict[str, dict[str, Any] | None] = {} + if not category_repo_dir: + return + try: + out = _git_run( + repo_root, + ["ls-tree", "-r", "--name-only", ref, "--", category_repo_dir], + ).stdout + except Exception: + return + for raw in out.splitlines(): + rel_path = raw.strip() + if not rel_path: + continue + object_id = _export_object_id_from_path(rel_path) + if object_id: + self.paths_by_id[object_id] = rel_path + + def get(self, object_id: str) -> dict[str, Any] | None: + object_id = str(object_id or "").strip() + if not object_id: + return None + if object_id in self.cache: + return self.cache[object_id] + rel_path = self.paths_by_id.get(object_id, "") + if not rel_path: + self.cache[object_id] = None + return None + try: + content = _git_run(self.repo_root, ["show", f"{self.ref}:{rel_path}"]).stdout + payload = json.loads(content) + self.cache[object_id] = payload if isinstance(payload, dict) else None + except Exception: + self.cache[object_id] = None + return self.cache[object_id] + + +def is_transient_resolution_error(error: str | None) -> bool: + text = str(error or "").strip().lower() + if not text: + return False + return any(marker in text for marker in TRANSIENT_RESOLUTION_ERROR_MARKERS) + + +def normalize_resolution_error(error: str | None) -> str: + text = str(error or "").strip() + if not text: + return "" + if is_transient_resolution_error(text): + return "" + return text + + +def normalize_resolution_lookup_errors(errors: list[str]) -> list[str]: + normalized: list[str] = [] + for raw in errors: + text = str(raw or "").strip() + if not text: + continue + if is_transient_resolution_error(text): + continue + normalized.append(text) + return sorted(set(normalized)) + + +class GraphClient: + def __init__(self, token: str, max_retries: int = 4): + self.token = token + self.max_retries = max_retries + + @staticmethod + def _get_retry_after_seconds(error: urllib.error.HTTPError) -> float | None: + retry_after = error.headers.get("Retry-After") + if not retry_after: + return None + try: + return max(0.0, float(retry_after)) + except ValueError: + return None + + def _request(self, url: str) -> dict: + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + }, + method="GET", + ) + attempt = 0 + while True: + try: + with urllib.request.urlopen(req, timeout=30) as response: + return json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + if exc.code in {429, 500, 502, 503, 504} and attempt < self.max_retries: + retry_after = self._get_retry_after_seconds(exc) + delay = retry_after if retry_after is not None else min(2**attempt, 10) + time.sleep(delay) + attempt += 1 + continue + raise + except urllib.error.URLError: + if attempt < self.max_retries: + time.sleep(min(2**attempt, 10)) + attempt += 1 + continue + raise + + def get_collection(self, url: str) -> tuple[list[dict], str | None]: + items: list[dict] = [] + next_url = url + while next_url: + try: + payload = self._request(next_url) + except urllib.error.HTTPError as exc: + return items, f"HTTP {exc.code}" + except Exception as exc: # noqa: BLE001 + return items, str(exc) + + value = payload.get("value") + if isinstance(value, list): + for item in value: + if isinstance(item, dict): + items.append(item) + next_url = payload.get("@odata.nextLink") + if next_url and not isinstance(next_url, str): + next_url = None + return items, None + + def get_object(self, url: str) -> tuple[dict | None, str | None]: + try: + payload = self._request(url) + if isinstance(payload, dict): + return payload, None + return None, "Unexpected non-object payload" + except urllib.error.HTTPError as exc: + return None, f"HTTP {exc.code}" + except Exception as exc: # noqa: BLE001 + return None, str(exc) + + +def _quote_odata_literal(value: str) -> str: + return value.replace("'", "''") + + +def _normalize_owner(owner: dict[str, Any]) -> dict[str, str]: + return { + "id": str(owner.get("id") or ""), + "displayName": str(owner.get("displayName") or ""), + "userPrincipalName": str(owner.get("userPrincipalName") or ""), + "appId": str(owner.get("appId") or ""), + "odataType": str(owner.get("@odata.type") or ""), + } + + +def resolve_owners( + client: GraphClient, + object_kind: str, + object_id: str, +) -> tuple[list[dict[str, str]], str | None]: + if not object_id: + return [], "Missing object id" + url = ( + f"https://graph.microsoft.com/v1.0/{object_kind}/" + + urllib.parse.quote(object_id) + + "/owners?$select=id,displayName,userPrincipalName,appId" + ) + owners, error = client.get_collection(url) + return [_normalize_owner(owner) for owner in owners], error + + +def _find_permission_by_id( + resource_sp: dict[str, Any], + permission_id: str, + permission_type: str, +) -> dict[str, str]: + result = { + "id": permission_id, + "type": permission_type, + "value": "", + "displayName": "", + "description": "", + } + if permission_type.lower() == "role": + for role in resource_sp.get("appRoles", []): + if str(role.get("id") or "").lower() == permission_id.lower(): + result["value"] = str(role.get("value") or "") + result["displayName"] = str(role.get("displayName") or "") + result["description"] = str(role.get("description") or "") + return result + return result + + for scope in resource_sp.get("oauth2PermissionScopes", []): + if str(scope.get("id") or "").lower() == permission_id.lower(): + result["value"] = str(scope.get("value") or "") + result["displayName"] = str(scope.get("adminConsentDisplayName") or "") + result["description"] = str(scope.get("adminConsentDescription") or "") + return result + return result + + +def resolve_required_resource_access( + app: dict[str, Any], + client: GraphClient, + resource_sp_by_appid: dict[str, dict[str, Any] | None], +) -> tuple[list[dict[str, Any]], int, int, list[str]]: + required = app.get("requiredResourceAccess") + if not isinstance(required, list): + return [], 0, 0, [] + + resolved: list[dict[str, Any]] = [] + unresolved_resource_count = 0 + unresolved_permission_count = 0 + lookup_errors: list[str] = [] + for item in required: + if not isinstance(item, dict): + continue + resource_app_id = str(item.get("resourceAppId") or "") + if not resource_app_id: + continue + if resource_app_id not in resource_sp_by_appid: + query_url = ( + "https://graph.microsoft.com/v1.0/servicePrincipals" + + "?$top=1" + + "&$select=id,appId,displayName,appRoles,oauth2PermissionScopes" + + "&$filter=appId eq '" + + urllib.parse.quote(_quote_odata_literal(resource_app_id)) + + "'" + ) + payload, error = client.get_object(query_url) + sp = None + if isinstance(payload, dict): + value = payload.get("value") + if isinstance(value, list) and value and isinstance(value[0], dict): + sp = value[0] + if sp is None: + direct_url = ( + "https://graph.microsoft.com/v1.0/servicePrincipals(appId='" + + urllib.parse.quote(_quote_odata_literal(resource_app_id)) + + "')?$select=id,appId,displayName,appRoles,oauth2PermissionScopes" + ) + direct_payload, direct_error = client.get_object(direct_url) + if isinstance(direct_payload, dict) and str(direct_payload.get("id") or "").strip(): + sp = direct_payload + elif direct_error and not error: + error = direct_error + if error: + lookup_errors.append(f"resourceAppId {resource_app_id}: {error}") + resource_sp_by_appid[resource_app_id] = sp + + resource_sp = resource_sp_by_appid.get(resource_app_id) + resource_name = ( + str(resource_sp.get("displayName") or "") if isinstance(resource_sp, dict) else "" + ) + if not resource_name: + unresolved_resource_count += 1 + permissions = [] + for access in item.get("resourceAccess", []): + if not isinstance(access, dict): + continue + permission_id = str(access.get("id") or "") + permission_type = str(access.get("type") or "") + if not permission_id: + continue + if isinstance(resource_sp, dict): + permissions.append( + _find_permission_by_id(resource_sp, permission_id=permission_id, permission_type=permission_type) + ) + else: + permissions.append( + { + "id": permission_id, + "type": permission_type, + "value": "", + "displayName": "", + "description": "", + } + ) + if permissions: + current = permissions[-1] + if not (str(current.get("value") or "").strip() or str(current.get("displayName") or "").strip()): + unresolved_permission_count += 1 + resolved.append( + { + "resourceAppId": resource_app_id, + "resourceDisplayName": resource_name or "Unresolved", + "permissions": permissions, + } + ) + + return ( + resolved, + unresolved_resource_count, + unresolved_permission_count, + normalize_resolution_lookup_errors(lookup_errors), + ) + + +def resolve_enterprise_app_role_assignments( + service_principal: dict[str, Any], + client: GraphClient, + resource_sp_by_id: dict[str, dict[str, Any] | None], + resource_sp_lock: threading.Lock | None = None, +) -> tuple[list[dict[str, Any]], str | None, int, int, list[str]]: + sp_id = str(service_principal.get("id") or "") + if not sp_id: + return [], "Missing service principal id", 0, 0, [] + + url = ( + "https://graph.microsoft.com/v1.0/servicePrincipals/" + + urllib.parse.quote(sp_id) + + "/appRoleAssignments?$select=id,resourceId,appRoleId,principalType" + ) + assignments, assignment_error = client.get_collection(url) + resolved: list[dict[str, Any]] = [] + unresolved_resource_count = 0 + unresolved_role_count = 0 + lookup_errors: list[str] = [] + for assignment in assignments: + if not isinstance(assignment, dict): + continue + resource_id = str(assignment.get("resourceId") or "") + app_role_id = str(assignment.get("appRoleId") or "") + principal_type = str(assignment.get("principalType") or "") + if not resource_id: + continue + if resource_sp_lock is not None: + with resource_sp_lock: + has_resource = resource_id in resource_sp_by_id + else: + has_resource = resource_id in resource_sp_by_id + + if not has_resource: + resource_url = ( + "https://graph.microsoft.com/v1.0/servicePrincipals/" + + urllib.parse.quote(resource_id) + + "?$select=id,appId,displayName,appRoles" + ) + payload, error = client.get_object(resource_url) + if resource_sp_lock is not None: + with resource_sp_lock: + if resource_id not in resource_sp_by_id: + resource_sp_by_id[resource_id] = payload if isinstance(payload, dict) else None + else: + resource_sp_by_id[resource_id] = payload if isinstance(payload, dict) else None + if error: + lookup_errors.append(f"resourceId {resource_id}: {error}") + + if resource_sp_lock is not None: + with resource_sp_lock: + resource_sp = resource_sp_by_id.get(resource_id) + else: + resource_sp = resource_sp_by_id.get(resource_id) + resource_name = ( + str(resource_sp.get("displayName") or "") if isinstance(resource_sp, dict) else "" + ) + if not resource_name: + unresolved_resource_count += 1 + role_value = "" + role_display_name = "" + if isinstance(resource_sp, dict): + for role in resource_sp.get("appRoles", []): + if str(role.get("id") or "").lower() == app_role_id.lower(): + role_value = str(role.get("value") or "") + role_display_name = str(role.get("displayName") or "") + break + if not role_value and not role_display_name: + unresolved_role_count += 1 + + resolved.append( + { + "resourceId": resource_id, + "resourceDisplayName": resource_name or "Unresolved", + "appRoleId": app_role_id, + "appRoleValue": role_value, + "appRoleDisplayName": role_display_name, + "principalType": principal_type, + } + ) + return ( + resolved, + assignment_error, + unresolved_resource_count, + unresolved_role_count, + normalize_resolution_lookup_errors(lookup_errors), + ) + + +def resolve_org_owner( + org_id: str, + local_org_by_id: dict[str, str], +) -> dict[str, str]: + org_id_text = str(org_id or "").strip() + if not org_id_text: + return {"tenantId": "", "displayName": "", "resolution": "missing"} + display_name = local_org_by_id.get(org_id_text, "") + if display_name: + return { + "tenantId": org_id_text, + "displayName": display_name, + "resolution": "localTenant", + } + return { + "tenantId": org_id_text, + "displayName": "", + "resolution": "externalOrUnresolved", + } + + +def _is_unresolved_marker(value: Any) -> bool: + text = str(value or "").strip() + if not text: + return True + return text.lower() == "unresolved" + + +def _owner_key(owner: dict[str, Any]) -> str: + return ( + str(owner.get("id") or "").strip() + or str(owner.get("appId") or "").strip() + or str(owner.get("userPrincipalName") or "").strip().casefold() + ) + + +def _merge_owner_resolution( + current: list[dict[str, str]], + previous: list[dict[str, Any]], +) -> list[dict[str, str]]: + previous_by_key: dict[str, dict[str, Any]] = {} + for item in previous: + if not isinstance(item, dict): + continue + key = _owner_key(item) + if key: + previous_by_key[key] = item + + merged: list[dict[str, str]] = [] + for item in current: + enriched = dict(item) + key = _owner_key(enriched) + prev = previous_by_key.get(key, {}) + if not str(enriched.get("displayName") or "").strip(): + prev_name = str(prev.get("displayName") or "").strip() + if prev_name: + enriched["displayName"] = prev_name + merged.append(enriched) + return merged + + +def _merge_required_resource_access_resolution( + current: list[dict[str, Any]], + previous: list[dict[str, Any]], +) -> list[dict[str, Any]]: + previous_by_resource: dict[str, dict[str, Any]] = {} + for item in previous: + if not isinstance(item, dict): + continue + key = str(item.get("resourceAppId") or "").strip() + if key: + previous_by_resource[key] = item + + merged: list[dict[str, Any]] = [] + for item in current: + if not isinstance(item, dict): + merged.append(item) + continue + enriched = dict(item) + key = str(enriched.get("resourceAppId") or "").strip() + prev = previous_by_resource.get(key, {}) + if _is_unresolved_marker(enriched.get("resourceDisplayName")): + prev_name = str(prev.get("resourceDisplayName") or "").strip() + if prev_name and not _is_unresolved_marker(prev_name): + enriched["resourceDisplayName"] = prev_name + + current_perms = enriched.get("permissions") + previous_perms = prev.get("permissions") if isinstance(prev, dict) else None + if isinstance(current_perms, list) and isinstance(previous_perms, list): + previous_by_perm: dict[tuple[str, str], dict[str, Any]] = {} + for perm in previous_perms: + if not isinstance(perm, dict): + continue + perm_key = ( + str(perm.get("id") or "").strip(), + str(perm.get("type") or "").strip().lower(), + ) + previous_by_perm[perm_key] = perm + merged_perms: list[dict[str, Any]] = [] + for perm in current_perms: + if not isinstance(perm, dict): + merged_perms.append(perm) + continue + merged_perm = dict(perm) + perm_key = ( + str(merged_perm.get("id") or "").strip(), + str(merged_perm.get("type") or "").strip().lower(), + ) + prev_perm = previous_by_perm.get(perm_key, {}) + for field in ("value", "displayName", "description"): + if not str(merged_perm.get(field) or "").strip(): + prev_value = str(prev_perm.get(field) or "").strip() + if prev_value: + merged_perm[field] = prev_value + merged_perms.append(merged_perm) + enriched["permissions"] = merged_perms + merged.append(enriched) + return merged + + +def _merge_app_role_assignments_resolution( + current: list[dict[str, Any]], + previous: list[dict[str, Any]], +) -> list[dict[str, Any]]: + previous_by_key: dict[tuple[str, str, str], dict[str, Any]] = {} + for item in previous: + if not isinstance(item, dict): + continue + key = ( + str(item.get("resourceId") or "").strip(), + str(item.get("appRoleId") or "").strip(), + str(item.get("principalType") or "").strip(), + ) + previous_by_key[key] = item + + merged: list[dict[str, Any]] = [] + for item in current: + if not isinstance(item, dict): + merged.append(item) + continue + enriched = dict(item) + key = ( + str(enriched.get("resourceId") or "").strip(), + str(enriched.get("appRoleId") or "").strip(), + str(enriched.get("principalType") or "").strip(), + ) + prev = previous_by_key.get(key, {}) + if _is_unresolved_marker(enriched.get("resourceDisplayName")): + prev_name = str(prev.get("resourceDisplayName") or "").strip() + if prev_name and not _is_unresolved_marker(prev_name): + enriched["resourceDisplayName"] = prev_name + if not str(enriched.get("appRoleValue") or "").strip(): + prev_value = str(prev.get("appRoleValue") or "").strip() + if prev_value: + enriched["appRoleValue"] = prev_value + if not str(enriched.get("appRoleDisplayName") or "").strip(): + prev_name = str(prev.get("appRoleDisplayName") or "").strip() + if prev_name: + enriched["appRoleDisplayName"] = prev_name + merged.append(enriched) + return merged + + +def _count_unresolved_required_permissions(required: list[dict[str, Any]]) -> tuple[int, int]: + unresolved_resource_count = 0 + unresolved_permission_count = 0 + for item in required: + if not isinstance(item, dict): + continue + if _is_unresolved_marker(item.get("resourceDisplayName")): + unresolved_resource_count += 1 + permissions = item.get("permissions") + if not isinstance(permissions, list): + continue + for permission in permissions: + if not isinstance(permission, dict): + continue + if not str(permission.get("value") or "").strip() and not str(permission.get("displayName") or "").strip(): + unresolved_permission_count += 1 + return unresolved_resource_count, unresolved_permission_count + + +def _count_unresolved_app_role_assignments(assignments: list[dict[str, Any]]) -> tuple[int, int]: + unresolved_resource_count = 0 + unresolved_role_count = 0 + for item in assignments: + if not isinstance(item, dict): + continue + if _is_unresolved_marker(item.get("resourceDisplayName")): + unresolved_resource_count += 1 + if not str(item.get("appRoleValue") or "").strip() and not str(item.get("appRoleDisplayName") or "").strip(): + unresolved_role_count += 1 + return unresolved_resource_count, unresolved_role_count + + +def _owners_need_backfill(owners: list[dict[str, str]]) -> bool: + for owner in owners: + if not isinstance(owner, dict): + continue + if _owner_key(owner) and not str(owner.get("displayName") or "").strip(): + return True + return False + + +def _required_resource_access_needs_backfill(required: list[dict[str, Any]]) -> bool: + unresolved_resources, unresolved_permissions = _count_unresolved_required_permissions(required) + return unresolved_resources > 0 or unresolved_permissions > 0 + + +def _app_role_assignments_need_backfill(assignments: list[dict[str, Any]]) -> bool: + unresolved_resources, unresolved_roles = _count_unresolved_app_role_assignments(assignments) + return unresolved_resources > 0 or unresolved_roles > 0 + + +def enrich_enterprise_application( + item: dict[str, Any], + client: GraphClient, + resource_sp_by_id: dict[str, dict[str, Any] | None], + resource_sp_lock: threading.Lock, + local_org_by_id: dict[str, str], +) -> tuple[list[dict[str, str]], list[dict[str, Any]], dict[str, Any]]: + object_id = str(item.get("id") or "").strip() + owners, owners_error = resolve_owners( + client=client, + object_kind="servicePrincipals", + object_id=object_id, + ) + ( + role_assignments, + role_assignment_error, + unresolved_resources, + unresolved_roles, + role_lookup_errors, + ) = resolve_enterprise_app_role_assignments( + service_principal=item, + client=client, + resource_sp_by_id=resource_sp_by_id, + resource_sp_lock=resource_sp_lock, + ) + resolution_status = { + "owners": { + "count": len(owners), + "error": normalize_resolution_error(owners_error), + }, + "appRoleAssignments": { + "count": len(role_assignments), + "collectionError": normalize_resolution_error(role_assignment_error), + "unresolvedResourceCount": unresolved_resources, + "unresolvedRoleCount": unresolved_roles, + "lookupErrors": normalize_resolution_lookup_errors(role_lookup_errors), + }, + } + owner_org = resolve_org_owner( + org_id=str(item.get("appOwnerOrganizationId") or ""), + local_org_by_id=local_org_by_id, + ) + return owners, role_assignments, {"resolutionStatus": resolution_status, "appOwnerOrganizationResolved": owner_org} + + +def write_collection( + root: pathlib.Path, + rel_dir: str, + title: str, + items: list[dict], + source_url: str, +) -> int: + out_dir = root / rel_dir + out_dir.mkdir(parents=True, exist_ok=True) + + written = 0 + for idx, item in enumerate(items, start=1): + object_id = str(item.get("id") or item.get("templateId") or f"item-{idx}") + display_name = ( + str(item.get("displayName") or item.get("name") or object_id) + .replace("\n", " ") + .strip() + ) + file_name = f"{sanitize_filename(display_name)}__{object_id}.json" + (out_dir / file_name).write_text( + json.dumps(item, indent=5, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + written += 1 + + md_path = out_dir / f"{title}.md" + lines = [ + f"# {title}", + "", + f"Source: `{source_url}`", + f"Object count: **{written}**", + "", + "| Name | Id |", + "|---|---|", + ] + for item in sorted( + items, + key=lambda x: ( + str(x.get("displayName") or x.get("name") or "").strip().casefold(), + str(x.get("id") or x.get("templateId") or "").strip().casefold(), + ), + ): + name = str(item.get("displayName") or item.get("name") or "Unknown").replace("|", "\\|") + oid = str(item.get("id") or item.get("templateId") or "") + lines.append(f"| {name} | {oid} |") + md_path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return written + + +def main() -> int: + args = parse_args() + root = pathlib.Path(args.root).resolve() + token = args.token.strip() + enterprise_app_workers = max(1, min(int(args.enterprise_app_workers), 32)) + include_named_locations = to_bool(args.include_named_locations) + include_auth_strengths = to_bool(args.include_authentication_strengths) + include_conditional_access = to_bool(args.include_conditional_access) + include_enterprise_apps = to_bool(args.include_enterprise_applications) + include_app_registrations = to_bool(args.include_app_registrations) + fail_on_export_error = to_bool(args.fail_on_export_error) + + if not token: + log("No Graph token provided. Skipping Entra baseline export.") + return 0 + + client = GraphClient(token) + + exports: list[dict[str, str]] = [] + if include_named_locations: + exports.append( + { + "title": "Named Locations", + "rel_dir": "Named Locations", + "url": "https://graph.microsoft.com/v1.0/identity/conditionalAccess/namedLocations", + } + ) + if include_auth_strengths: + exports.append( + { + "title": "Authentication Strengths", + "rel_dir": "Authentication Strengths", + "url": "https://graph.microsoft.com/beta/identity/conditionalAccess/authenticationStrength/policies", + } + ) + if include_conditional_access: + exports.append( + { + "title": "Conditional Access", + "rel_dir": "Conditional Access", + "url": "https://graph.microsoft.com/v1.0/identity/conditionalAccess/policies", + } + ) + if include_enterprise_apps: + exports.append( + { + "title": "Enterprise Applications", + "rel_dir": "Enterprise Applications", + "url": ( + "https://graph.microsoft.com/v1.0/servicePrincipals" + "?$filter=servicePrincipalType%20eq%20'Application'" + "&$select=id,appId,displayName,servicePrincipalType,appOwnerOrganizationId," + "accountEnabled,publisherName,preferredSingleSignOnMode,tags," + "appRoleAssignmentRequired,appRoles,oauth2PermissionScopes," + "homepage,replyUrls,logoutUrl,servicePrincipalNames,verifiedPublisher" + ), + } + ) + if include_app_registrations: + exports.append( + { + "title": "App Registrations", + "rel_dir": "App Registrations", + "url": ( + "https://graph.microsoft.com/v1.0/applications" + "?$select=id,appId,displayName,description,signInAudience,publisherDomain," + "identifierUris,createdDateTime,tags,requiredResourceAccess,api,web,spa," + "publicClient,isFallbackPublicClient,verifiedPublisher" + ), + } + ) + + if not exports: + log("All Entra export categories are disabled. Skipping Entra baseline export.") + return 0 + + total_written = 0 + warnings = 0 + failed_exports: list[tuple[str, str]] = [] + resource_sp_by_appid: dict[str, dict[str, Any] | None] = {} + resource_sp_by_id: dict[str, dict[str, Any] | None] = {} + local_org_by_id: dict[str, str] = {} + if include_app_registrations: + cached_resource_sps = _load_resource_sp_cache_from_export(root) + if cached_resource_sps: + resource_sp_by_appid.update(cached_resource_sps) + for sp in cached_resource_sps.values(): + object_id = str(sp.get("id") or "").strip() + if object_id: + resource_sp_by_id[object_id] = sp + log( + "Primed resource service-principal cache from local Enterprise Applications export: " + + f"{len(cached_resource_sps)} objects" + ) + repo_root = _discover_repo_root(root) + previous_snapshot_ref = "" + if repo_root is not None: + candidates = [ + args.previous_snapshot_ref, + os.getenv("DRIFT_BRANCH_ENTRA", ""), + os.getenv("DRIFT_BRANCH", ""), + "origin/drift/entra", + os.getenv("BASELINE_BRANCH", ""), + ] + for candidate_raw in candidates: + candidate = _resolve_existing_branch_ref(repo_root, candidate_raw) + if candidate: + previous_snapshot_ref = candidate + break + root_repo_rel = _repo_relative_posix(repo_root, root) if repo_root is not None else "" + previous_lookup_by_title: dict[str, PreviousSnapshotLookup] = {} + if repo_root is not None and previous_snapshot_ref and root_repo_rel: + for title in ("Enterprise Applications", "App Registrations"): + category_repo_dir = f"{root_repo_rel}/{title}".strip("/") + previous_lookup_by_title[title] = PreviousSnapshotLookup( + repo_root=repo_root, + ref=previous_snapshot_ref, + category_repo_dir=category_repo_dir, + ) + log(f"Using previous snapshot reference for resolution backfill: {previous_snapshot_ref}") + else: + log("No previous snapshot reference found for resolution backfill; unresolved placeholders may cause drift noise.") + + log("Resolving local organization details...") + org_payload, org_error = client.get_object( + "https://graph.microsoft.com/v1.0/organization?$select=id,displayName" + ) + if org_error: + log(f"Warning: unable to resolve local organization details ({org_error})") + warnings += 1 + elif isinstance(org_payload, dict): + org_values = org_payload.get("value") + if isinstance(org_values, list): + for org in org_values: + if not isinstance(org, dict): + continue + org_id = str(org.get("id") or "").strip() + display_name = str(org.get("displayName") or "").strip() + if org_id: + local_org_by_id[org_id] = display_name + + for export in exports: + log(f"Starting export: {export['title']}") + items, error = client.get_collection(export["url"]) + if error: + log(f"Warning: unable to export {export['title']} from {export['url']} ({error})") + warnings += 1 + failed_exports.append((export["title"], str(error))) + continue + + if export["title"] == "Enterprise Applications": + enterprise_items = [item for item in items if isinstance(item, dict)] + total = len(enterprise_items) + log( + f"Resolving Enterprise Applications details for {total} objects " + + f"using {enterprise_app_workers} worker(s)..." + ) + for item in enterprise_items: + app_id = str(item.get("appId") or "").strip() + object_id = str(item.get("id") or "").strip() + if app_id and app_id not in resource_sp_by_appid: + resource_sp_by_appid[app_id] = item + if object_id and object_id not in resource_sp_by_id: + resource_sp_by_id[object_id] = item + + resource_sp_lock = threading.Lock() + if enterprise_app_workers == 1: + for idx, item in enumerate(enterprise_items, start=1): + if idx == 1 or idx % 25 == 0 or idx == total: + log(f"Enterprise Applications progress: {idx}/{total}") + owners, role_assignments, resolved = enrich_enterprise_application( + item=item, + client=client, + resource_sp_by_id=resource_sp_by_id, + resource_sp_lock=resource_sp_lock, + local_org_by_id=local_org_by_id, + ) + previous_item = None + object_id = str(item.get("id") or "").strip() + previous_lookup = previous_lookup_by_title.get("Enterprise Applications") + needs_backfill = ( + _owners_need_backfill(owners) + or _app_role_assignments_need_backfill(role_assignments) + or not str(resolved["appOwnerOrganizationResolved"].get("displayName") or "").strip() + ) + if previous_lookup and object_id and needs_backfill: + previous_item = previous_lookup.get(object_id) + if isinstance(previous_item, dict): + owners = _merge_owner_resolution( + owners, + previous_item.get("ownersResolved") + if isinstance(previous_item.get("ownersResolved"), list) + else [], + ) + role_assignments = _merge_app_role_assignments_resolution( + role_assignments, + previous_item.get("appRoleAssignmentsResolved") + if isinstance(previous_item.get("appRoleAssignmentsResolved"), list) + else [], + ) + previous_owner_org = previous_item.get("appOwnerOrganizationResolved") + if ( + isinstance(previous_owner_org, dict) + and not str(resolved["appOwnerOrganizationResolved"].get("displayName") or "").strip() + ): + prev_owner_name = str(previous_owner_org.get("displayName") or "").strip() + if prev_owner_name: + resolved["appOwnerOrganizationResolved"]["displayName"] = prev_owner_name + unresolved_resources, unresolved_roles = _count_unresolved_app_role_assignments(role_assignments) + app_role_status = resolved["resolutionStatus"].get("appRoleAssignments", {}) + if isinstance(app_role_status, dict): + app_role_status["count"] = len(role_assignments) + app_role_status["unresolvedResourceCount"] = unresolved_resources + app_role_status["unresolvedRoleCount"] = unresolved_roles + resolved["resolutionStatus"]["appRoleAssignments"] = app_role_status + item["ownersResolved"] = owners + item["appRoleAssignmentsResolved"] = role_assignments + item["appOwnerOrganizationResolved"] = resolved["appOwnerOrganizationResolved"] + item["resolutionStatus"] = resolved["resolutionStatus"] + else: + completed = 0 + with concurrent.futures.ThreadPoolExecutor(max_workers=enterprise_app_workers) as pool: + future_to_item = { + pool.submit( + enrich_enterprise_application, + item, + client, + resource_sp_by_id, + resource_sp_lock, + local_org_by_id, + ): item + for item in enterprise_items + } + for future in concurrent.futures.as_completed(future_to_item): + item = future_to_item[future] + try: + owners, role_assignments, resolved = future.result() + except Exception as exc: # noqa: BLE001 + warnings += 1 + normalized_error = normalize_resolution_error(str(exc)) + owners = [] + role_assignments = [] + resolved = { + "appOwnerOrganizationResolved": resolve_org_owner( + org_id=str(item.get("appOwnerOrganizationId") or ""), + local_org_by_id=local_org_by_id, + ), + "resolutionStatus": { + "owners": {"count": 0, "error": normalized_error}, + "appRoleAssignments": { + "count": 0, + "collectionError": normalized_error, + "unresolvedResourceCount": 0, + "unresolvedRoleCount": 0, + "lookupErrors": [], + }, + }, + } + + previous_item = None + object_id = str(item.get("id") or "").strip() + previous_lookup = previous_lookup_by_title.get("Enterprise Applications") + needs_backfill = ( + _owners_need_backfill(owners) + or _app_role_assignments_need_backfill(role_assignments) + or not str(resolved["appOwnerOrganizationResolved"].get("displayName") or "").strip() + ) + if previous_lookup and object_id and needs_backfill: + previous_item = previous_lookup.get(object_id) + if isinstance(previous_item, dict): + owners = _merge_owner_resolution( + owners, + previous_item.get("ownersResolved") + if isinstance(previous_item.get("ownersResolved"), list) + else [], + ) + role_assignments = _merge_app_role_assignments_resolution( + role_assignments, + previous_item.get("appRoleAssignmentsResolved") + if isinstance(previous_item.get("appRoleAssignmentsResolved"), list) + else [], + ) + previous_owner_org = previous_item.get("appOwnerOrganizationResolved") + if ( + isinstance(previous_owner_org, dict) + and not str(resolved["appOwnerOrganizationResolved"].get("displayName") or "").strip() + ): + prev_owner_name = str(previous_owner_org.get("displayName") or "").strip() + if prev_owner_name: + resolved["appOwnerOrganizationResolved"]["displayName"] = prev_owner_name + unresolved_resources, unresolved_roles = _count_unresolved_app_role_assignments( + role_assignments + ) + app_role_status = resolved["resolutionStatus"].get("appRoleAssignments", {}) + if isinstance(app_role_status, dict): + app_role_status["count"] = len(role_assignments) + app_role_status["unresolvedResourceCount"] = unresolved_resources + app_role_status["unresolvedRoleCount"] = unresolved_roles + resolved["resolutionStatus"]["appRoleAssignments"] = app_role_status + + item["ownersResolved"] = owners + item["appRoleAssignmentsResolved"] = role_assignments + item["appOwnerOrganizationResolved"] = resolved["appOwnerOrganizationResolved"] + item["resolutionStatus"] = resolved["resolutionStatus"] + + completed += 1 + if completed == 1 or completed % 25 == 0 or completed == total: + log(f"Enterprise Applications progress: {completed}/{total}") + + if export["title"] == "App Registrations": + total = len(items) + log(f"Resolving App Registrations details for {total} objects...") + for idx, item in enumerate(items, start=1): + if not isinstance(item, dict): + continue + if idx == 1 or idx % 25 == 0 or idx == total: + log(f"App Registrations progress: {idx}/{total}") + object_id = str(item.get("id") or "").strip() + owners, owners_error = resolve_owners( + client=client, + object_kind="applications", + object_id=object_id, + ) + ( + required_resolved, + unresolved_resources, + unresolved_permissions, + required_lookup_errors, + ) = resolve_required_resource_access( + app=item, + client=client, + resource_sp_by_appid=resource_sp_by_appid, + ) + previous_item = None + previous_lookup = previous_lookup_by_title.get("App Registrations") + needs_backfill = ( + _owners_need_backfill(owners) + or _required_resource_access_needs_backfill(required_resolved) + ) + if previous_lookup and object_id and needs_backfill: + previous_item = previous_lookup.get(object_id) + if isinstance(previous_item, dict): + owners = _merge_owner_resolution( + owners, + previous_item.get("ownersResolved") + if isinstance(previous_item.get("ownersResolved"), list) + else [], + ) + required_resolved = _merge_required_resource_access_resolution( + required_resolved, + previous_item.get("requiredResourceAccessResolved") + if isinstance(previous_item.get("requiredResourceAccessResolved"), list) + else [], + ) + unresolved_resources, unresolved_permissions = _count_unresolved_required_permissions(required_resolved) + item["ownersResolved"] = owners + item["requiredResourceAccessResolved"] = required_resolved + item["resolutionStatus"] = { + "owners": { + "count": len(owners), + "error": normalize_resolution_error(owners_error), + }, + "requiredResourceAccess": { + "resourceCount": len(required_resolved), + "unresolvedResourceCount": unresolved_resources, + "unresolvedPermissionCount": unresolved_permissions, + "lookupErrors": normalize_resolution_lookup_errors(required_lookup_errors), + }, + } + + written = write_collection( + root=root, + rel_dir=export["rel_dir"], + title=export["title"], + items=items, + source_url=export["url"], + ) + total_written += written + log(f"Exported {written} objects: {export['title']}") + + if failed_exports and fail_on_export_error: + log("Entra baseline export failed because one or more requested categories could not be exported:") + for title, error in failed_exports: + log(f" - {title}: {error}") + log( + "Requested category failures are treated as fatal to avoid committing a partial or stale backup snapshot." + ) + return 2 + + log( + "Entra baseline export complete. " + + f"Total objects written: {total_written}. " + + f"Warnings: {warnings}." + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/filter_entra_enrichment_noise.py b/scripts/filter_entra_enrichment_noise.py new file mode 100644 index 0000000..68b8075 --- /dev/null +++ b/scripts/filter_entra_enrichment_noise.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +"""Revert Entra JSON file edits when only enrichment metadata changed.""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from pathlib import Path, PurePosixPath +from typing import Any + + +ENRICHMENT_KEY_NAMES = { + "ownersresolved", + "approleassignmentsresolved", + "requiredresourceaccessresolved", + "appownerorganizationresolved", + "resolutionstatus", +} + + +def _to_bool(value: str) -> bool: + return str(value).strip().lower() in {"1", "true", "yes", "y", "on"} + + +def _run_git(repo_root: Path, args: list[str], check: bool = True) -> subprocess.CompletedProcess[bytes]: + proc = subprocess.run( + ["git", *args], + cwd=str(repo_root), + check=False, + capture_output=True, + ) + if check and proc.returncode != 0: + stderr = proc.stderr.decode("utf-8", errors="replace").strip() + raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}") + return proc + + +def _strip_enrichment(value: Any) -> Any: + if isinstance(value, dict): + cleaned: dict[str, Any] = {} + for key, child in value.items(): + if str(key).strip().lower() in ENRICHMENT_KEY_NAMES: + continue + cleaned[key] = _strip_enrichment(child) + return cleaned + if isinstance(value, list): + return [_strip_enrichment(item) for item in value] + return value + + +def _is_enrichment_only_change(old_text: str, new_text: str) -> bool: + if not old_text or not new_text: + return False + try: + old_payload = json.loads(old_text) + new_payload = json.loads(new_text) + except Exception: + return False + if not isinstance(old_payload, dict) or not isinstance(new_payload, dict): + return False + + old_stripped = _strip_enrichment(old_payload) + new_stripped = _strip_enrichment(new_payload) + if old_stripped != new_stripped: + return False + return old_payload != new_payload + + +def _modified_paths(repo_root: Path, workload_root: str) -> list[str]: + proc = _run_git( + repo_root, + ["diff", "--name-only", "-z", "--diff-filter=M", "--", workload_root], + check=True, + ) + raw = proc.stdout.split(b"\x00") + paths: list[str] = [] + for chunk in raw: + text = chunk.decode("utf-8", errors="replace").strip() + if text: + paths.append(text) + return paths + + +def _is_json_path(path: str) -> bool: + return PurePosixPath(path.replace("\\", "/")).suffix.lower() == ".json" + + +def filter_enrichment_only_files(repo_root: Path, workload_root: str) -> list[str]: + reverted: list[str] = [] + for rel_path in _modified_paths(repo_root, workload_root): + if not _is_json_path(rel_path): + continue + + head_proc = _run_git(repo_root, ["show", f"HEAD:{rel_path}"], check=False) + if head_proc.returncode != 0: + continue + old_text = head_proc.stdout.decode("utf-8", errors="replace") + + abs_path = repo_root / rel_path + if not abs_path.is_file(): + continue + new_text = abs_path.read_text(encoding="utf-8") + + if _is_enrichment_only_change(old_text, new_text): + _run_git(repo_root, ["checkout", "--quiet", "--", rel_path], check=True) + reverted.append(rel_path) + return reverted + + +def find_enrichment_only_modified_files(repo_root: Path, workload_root: str) -> list[str]: + matches: list[str] = [] + for rel_path in _modified_paths(repo_root, workload_root): + if not _is_json_path(rel_path): + continue + + head_proc = _run_git(repo_root, ["show", f"HEAD:{rel_path}"], check=False) + if head_proc.returncode != 0: + continue + old_text = head_proc.stdout.decode("utf-8", errors="replace") + + abs_path = repo_root / rel_path + if not abs_path.is_file(): + continue + new_text = abs_path.read_text(encoding="utf-8") + + if _is_enrichment_only_change(old_text, new_text): + matches.append(rel_path) + return matches + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repo-root", required=True, help="Repository root path.") + parser.add_argument( + "--workload-root", + default="tenant-state/entra", + help="Path scope inside repo to inspect (default: tenant-state/entra).", + ) + parser.add_argument( + "--fail-on-residual-enrichment-drift", + default="true", + help="Exit non-zero when enrichment-only modified files remain after filtering (true/false).", + ) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + repo_root = Path(args.repo_root).resolve() + reverted = filter_enrichment_only_files(repo_root=repo_root, workload_root=args.workload_root) + if reverted: + print(f"Reverted enrichment-only Entra file changes: {len(reverted)}") + for path in reverted: + print(f" - {path}") + else: + print("No enrichment-only Entra file changes detected.") + + residual = find_enrichment_only_modified_files(repo_root=repo_root, workload_root=args.workload_root) + if residual: + print(f"Residual enrichment-only Entra file changes still present: {len(residual)}") + for path in residual: + print(f" - {path}") + if _to_bool(args.fail_on_residual_enrichment_drift): + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/filter_intune_partial_settings_noise.py b/scripts/filter_intune_partial_settings_noise.py new file mode 100644 index 0000000..ea93886 --- /dev/null +++ b/scripts/filter_intune_partial_settings_noise.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +"""Revert Intune Settings Catalog partial exports where settings payload is missing.""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from pathlib import Path +from typing import Any + + +def _to_bool(value: str) -> bool: + return str(value).strip().lower() in {"1", "true", "yes", "y", "on"} + + +def _run_git_show(repo_root: Path, ref: str, rel_path: str) -> str | None: + proc = subprocess.run( + ["git", "show", f"{ref}:{rel_path}"], + cwd=str(repo_root), + check=False, + capture_output=True, + ) + if proc.returncode != 0: + return None + return proc.stdout.decode("utf-8", errors="replace") + + +def _is_settings_catalog_json(file_path: Path, backup_root: Path) -> bool: + if file_path.suffix.lower() != ".json": + return False + rel = file_path.relative_to(backup_root).as_posix().lower() + return rel.startswith("settings catalog/") + + +def _is_partial_settings_payload(payload: Any) -> bool: + if not isinstance(payload, dict): + return False + setting_count = payload.get("settingCount") + if not isinstance(setting_count, int) or setting_count <= 0: + return False + settings = payload.get("settings") + if not isinstance(settings, list): + return True + return len(settings) == 0 + + +def restore_partial_settings_from_baseline( + repo_root: Path, + backup_root: Path, + baseline_ref: str, +) -> tuple[list[str], list[str]]: + restored: list[str] = [] + unresolved: list[str] = [] + + for file_path in sorted(backup_root.rglob("*.json")): + if not _is_settings_catalog_json(file_path, backup_root): + continue + + try: + current_payload = json.loads(file_path.read_text(encoding="utf-8")) + except Exception: + continue + + if not _is_partial_settings_payload(current_payload): + continue + + rel_path = file_path.relative_to(repo_root).as_posix() + baseline_text = _run_git_show(repo_root, baseline_ref, rel_path) + if not baseline_text: + unresolved.append(rel_path) + continue + + try: + baseline_payload = json.loads(baseline_text) + except Exception: + unresolved.append(rel_path) + continue + + baseline_settings = baseline_payload.get("settings") + if not isinstance(baseline_settings, list) or len(baseline_settings) == 0: + unresolved.append(rel_path) + continue + + current_payload["settings"] = baseline_settings + file_path.write_text(json.dumps(current_payload, indent=5, ensure_ascii=False), encoding="utf-8") + restored.append(rel_path) + + return restored, unresolved + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repo-root", required=True, help="Repository root path.") + parser.add_argument( + "--backup-root", + default="tenant-state/intune", + help="Path to Intune backup root (default: tenant-state/intune).", + ) + parser.add_argument( + "--baseline-ref", + default="HEAD", + help="Git ref used as baseline for restoration (default: HEAD).", + ) + parser.add_argument( + "--fail-on-unresolved-partial-exports", + default="true", + help="Exit non-zero when partial exports cannot be restored from baseline (true/false).", + ) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + repo_root = Path(args.repo_root).resolve() + backup_root_arg = Path(args.backup_root) + backup_root = backup_root_arg if backup_root_arg.is_absolute() else repo_root / backup_root_arg + backup_root = backup_root.resolve() + + restored, unresolved = restore_partial_settings_from_baseline( + repo_root=repo_root, + backup_root=backup_root, + baseline_ref=args.baseline_ref, + ) + + if restored: + print(f"Restored partial Intune Settings Catalog exports from baseline: {len(restored)}") + for path in restored: + print(f" - {path}") + else: + print("No partial Intune Settings Catalog exports detected.") + + if unresolved: + print(f"Unresolved partial Intune Settings Catalog exports: {len(unresolved)}") + for path in unresolved: + print(f" - {path}") + if _to_bool(args.fail_on_unresolved_partial_exports): + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/generate_app_inventory_report.py b/scripts/generate_app_inventory_report.py new file mode 100644 index 0000000..25502b9 --- /dev/null +++ b/scripts/generate_app_inventory_report.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python3 +"""Generate a dedicated apps inventory CSV from Entra app exports.""" + +from __future__ import annotations + +import argparse +import csv +import json +from pathlib import Path +from typing import Any + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--root", required=True, help="Path to the Entra workload backup root (tenant-state/entra).") + parser.add_argument( + "--output-dir", + required=True, + help="Directory where apps inventory report files will be written.", + ) + parser.add_argument( + "--output-name", + default="apps-inventory.csv", + help="Output CSV filename (default: apps-inventory.csv).", + ) + return parser.parse_args() + + +def safe_text(value: object) -> str: + if value is None: + return "" + return str(value).strip() + + +def summarize_owners(owners: object) -> tuple[int, str]: + if not isinstance(owners, list): + return 0, "" + + labels: list[str] = [] + for owner in owners: + if not isinstance(owner, dict): + continue + label = ( + safe_text(owner.get("displayName")) + or safe_text(owner.get("userPrincipalName")) + or safe_text(owner.get("appId")) + or safe_text(owner.get("id")) + or "Unknown owner" + ) + labels.append(label) + + return len(labels), "; ".join(labels) + + +def summarize_required_resource_access(entries: object) -> tuple[int, str]: + if not isinstance(entries, list): + return 0, "" + + summary: list[str] = [] + total_permissions = 0 + for entry in entries: + if not isinstance(entry, dict): + continue + resource_name = safe_text(entry.get("resourceDisplayName")) or "Unresolved resource" + resource_app_id = safe_text(entry.get("resourceAppId")) + permissions = entry.get("permissions") + permission_labels: list[str] = [] + if isinstance(permissions, list): + for permission in permissions: + if not isinstance(permission, dict): + continue + total_permissions += 1 + perm_type = safe_text(permission.get("type")) or "UnknownType" + perm_label = ( + safe_text(permission.get("value")) + or safe_text(permission.get("displayName")) + or safe_text(permission.get("id")) + or "UnknownPermission" + ) + permission_labels.append(f"{perm_label} [{perm_type}]") + + resource_label = resource_name + if resource_app_id: + resource_label += f" ({resource_app_id})" + if permission_labels: + summary.append(f"{resource_label}: {', '.join(permission_labels)}") + else: + summary.append(resource_label) + + return total_permissions, "; ".join(summary) + + +def summarize_enterprise_app_role_assignments(entries: object) -> tuple[int, str]: + if not isinstance(entries, list): + return 0, "" + + summary: list[str] = [] + count = 0 + for entry in entries: + if not isinstance(entry, dict): + continue + count += 1 + resource_name = safe_text(entry.get("resourceDisplayName")) or "Unresolved resource" + resource_id = safe_text(entry.get("resourceId")) + role_name = ( + safe_text(entry.get("appRoleValue")) + or safe_text(entry.get("appRoleDisplayName")) + or safe_text(entry.get("appRoleId")) + or "Default access" + ) + label = resource_name + if resource_id: + label += f" ({resource_id})" + summary.append(f"{label}: {role_name}") + + return count, "; ".join(summary) + + +def verified_publisher_label(value: object) -> str: + if not isinstance(value, dict): + return "" + return ( + safe_text(value.get("displayName")) + or safe_text(value.get("verifiedPublisherId")) + or safe_text(value.get("addedDateTime")) + ) + + +def iter_exported_json(export_dir: Path) -> list[tuple[Path, dict[str, Any]]]: + if not export_dir.exists(): + return [] + items: list[tuple[Path, dict[str, Any]]] = [] + for path in sorted(export_dir.rglob("*.json")): + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except Exception: + continue + if isinstance(payload, dict): + items.append((path, payload)) + return items + + +def main() -> int: + args = parse_args() + root = Path(args.root).resolve() + output_dir = Path(args.output_dir).resolve() + output_path = output_dir / args.output_name + + if not root.exists(): + raise SystemExit(f"Backup path does not exist: {root}") + + app_reg_dir = root / "App Registrations" + ent_apps_dir = root / "Enterprise Applications" + + app_reg_items = iter_exported_json(app_reg_dir) + ent_app_items = iter_exported_json(ent_apps_dir) + + rows: list[dict[str, str]] = [] + + for source_path, payload in app_reg_items: + owner_count, owners = summarize_owners(payload.get("ownersResolved")) + perm_count, permissions = summarize_required_resource_access( + payload.get("requiredResourceAccessResolved") + ) + rows.append( + { + "AppType": "AppRegistration", + "DisplayName": safe_text(payload.get("displayName")) or source_path.stem, + "ObjectId": safe_text(payload.get("id")), + "AppId": safe_text(payload.get("appId")), + "SignInAudience": safe_text(payload.get("signInAudience")), + "ServicePrincipalType": "", + "AccountEnabled": "", + "PublisherDomain": safe_text(payload.get("publisherDomain")), + "PublisherName": "", + "VerifiedPublisher": verified_publisher_label(payload.get("verifiedPublisher")), + "CreatedDateTime": safe_text(payload.get("createdDateTime")), + "OwnersCount": str(owner_count), + "OwnersResolved": owners, + "ResolvedPermissionCount": str(perm_count), + "ResolvedPermissions": permissions, + "ResolvedAppRoleAssignmentCount": "0", + "ResolvedAppRoleAssignments": "", + "SourceFile": source_path.relative_to(root).as_posix(), + } + ) + + for source_path, payload in ent_app_items: + owner_count, owners = summarize_owners(payload.get("ownersResolved")) + assignment_count, assignments = summarize_enterprise_app_role_assignments( + payload.get("appRoleAssignmentsResolved") + ) + rows.append( + { + "AppType": "EnterpriseApplication", + "DisplayName": safe_text(payload.get("displayName")) or source_path.stem, + "ObjectId": safe_text(payload.get("id")), + "AppId": safe_text(payload.get("appId")), + "SignInAudience": "", + "ServicePrincipalType": safe_text(payload.get("servicePrincipalType")), + "AccountEnabled": safe_text(payload.get("accountEnabled")), + "PublisherDomain": "", + "PublisherName": safe_text(payload.get("publisherName")), + "VerifiedPublisher": verified_publisher_label(payload.get("verifiedPublisher")), + "CreatedDateTime": "", + "OwnersCount": str(owner_count), + "OwnersResolved": owners, + "ResolvedPermissionCount": "0", + "ResolvedPermissions": "", + "ResolvedAppRoleAssignmentCount": str(assignment_count), + "ResolvedAppRoleAssignments": assignments, + "SourceFile": source_path.relative_to(root).as_posix(), + } + ) + + rows.sort( + key=lambda row: ( + row["AppType"].lower(), + row["DisplayName"].lower(), + row["ObjectId"].lower(), + ) + ) + + output_dir.mkdir(parents=True, exist_ok=True) + fieldnames = [ + "AppType", + "DisplayName", + "ObjectId", + "AppId", + "SignInAudience", + "ServicePrincipalType", + "AccountEnabled", + "PublisherDomain", + "PublisherName", + "VerifiedPublisher", + "CreatedDateTime", + "OwnersCount", + "OwnersResolved", + "ResolvedPermissionCount", + "ResolvedPermissions", + "ResolvedAppRoleAssignmentCount", + "ResolvedAppRoleAssignments", + "SourceFile", + ] + with output_path.open("w", encoding="utf-8", newline="") as handle: + writer = csv.DictWriter(handle, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + + print( + "Generated apps inventory report: " + + f"{output_path} " + + f"(rows={len(rows)}, appRegistrations={len(app_reg_items)}, enterpriseApps={len(ent_app_items)})" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/generate_assignment_report.py b/scripts/generate_assignment_report.py new file mode 100644 index 0000000..ea9fd68 --- /dev/null +++ b/scripts/generate_assignment_report.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python3 +"""Generate a policy assignment inventory report from Intune backup JSON files.""" + +from __future__ import annotations + +import argparse +import csv +import json +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Iterable + + +GROUP_TARGET_TYPES = { + "#microsoft.graph.groupAssignmentTarget", + "#microsoft.graph.exclusionGroupAssignmentTarget", +} + +DEFAULT_POLICY_TYPES = { + "app configuration", + "app protection", + "applications", + "compliance policies", + "conditional access", + "device configurations", + "enrollment configurations", + "enrollment profiles", + "filters", + "scripts", + "settings catalog", +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--root", required=True, help="Path to the workload backup root (for example tenant-state/intune).") + parser.add_argument( + "--output-dir", + required=True, + help="Directory where report files will be written.", + ) + parser.add_argument( + "--policy-type", + action="append", + default=[], + help=( + "Optional filter for policy type (top-level backup folder name). " + "Repeat the flag or pass a comma-separated list." + ), + ) + parser.add_argument( + "--graph-type", + action="append", + default=[], + help=( + "Optional filter for Graph @odata.type values. " + "Repeat the flag or pass a comma-separated list." + ), + ) + return parser.parse_args() + + +@dataclass +class AssignmentRow: + category: str + policy_type: str + object_name: str + object_type: str + assignment_state: str + assignment_count: int + intent: str + assignment_target: str + target_type: str + assignment_filter: str + filter_type: str + source_file: str + + +def safe_text(value: object) -> str: + if value is None: + return "" + return str(value).strip() + + +def normalize_intent(intent: str) -> str: + normalized = safe_text(intent).lower() + if normalized in {"apply", "include"}: + return "Include" + if normalized in {"exclude"}: + return "Exclude" + if not normalized: + return "Include" + return normalized.capitalize() + + +def infer_intent(assignment: dict, target_type: str) -> str: + target_type_lower = safe_text(target_type).lower() + if "exclusion" in target_type_lower: + return "Exclude" + explicit = safe_text(assignment.get("intent")) + if explicit: + return normalize_intent(explicit) + return "Include" + + +def resolve_assignment_target(target: dict) -> str: + target_type = safe_text(target.get("@odata.type")) + if target_type == "#microsoft.graph.allDevicesAssignmentTarget": + return "All devices" + if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget": + return "All users" + if target_type in GROUP_TARGET_TYPES: + return ( + safe_text(target.get("groupDisplayName")) + or safe_text(target.get("groupName")) + or safe_text(target.get("groupId")) + or "Unresolved group" + ) + return ( + safe_text(target.get("groupDisplayName")) + or safe_text(target.get("groupName")) + or safe_text(target.get("displayName")) + or safe_text(target.get("id")) + or "Unknown target" + ) + + +def escape_md_cell(value: str) -> str: + return value.replace("\\", "\\\\").replace("|", "\\|").replace("\n", " ").strip() + + +def parse_filter_values(raw_values: list[str]) -> set[str]: + values = set() + for raw in raw_values: + for item in safe_text(raw).split(","): + normalized = safe_text(item) + if normalized: + values.add(normalized.lower()) + return values + + +def iter_assignment_rows( + root: Path, + policy_type_filter: set[str], + graph_type_filter: set[str], +) -> Iterable[AssignmentRow]: + excluded_categories = { + "App Registrations", + "Enterprise Applications", + } + for path in sorted(root.rglob("*.json")): + try: + rel_path = path.relative_to(root) + except ValueError: + continue + + if rel_path.parts and rel_path.parts[0] in {"reports"}: + continue + if "__archive__" in rel_path.parts: + continue + + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except Exception: + continue + if not isinstance(payload, dict): + continue + + object_name = safe_text(payload.get("displayName")) or safe_text(payload.get("name")) + if not object_name: + object_name = path.stem.split("__")[0] + object_type = safe_text(payload.get("@odata.type")) + category = "/".join(rel_path.parent.parts) + policy_type = rel_path.parts[0] if rel_path.parts else "" + + if any( + category == excluded or category.startswith(f"{excluded}/") + for excluded in excluded_categories + ): + continue + + if policy_type_filter and policy_type.lower() not in policy_type_filter: + continue + if graph_type_filter and object_type.lower() not in graph_type_filter: + continue + + assignments = payload.get("assignments") + if not isinstance(assignments, list): + yield AssignmentRow( + category=category, + policy_type=policy_type, + object_name=object_name, + object_type=object_type, + assignment_state="NotExported", + assignment_count=0, + intent="None", + assignment_target="Not exported in backup", + target_type="", + assignment_filter="", + filter_type="", + source_file=rel_path.as_posix(), + ) + continue + + if not assignments: + yield AssignmentRow( + category=category, + policy_type=policy_type, + object_name=object_name, + object_type=object_type, + assignment_state="Unassigned", + assignment_count=0, + intent="None", + assignment_target="No assignments", + target_type="", + assignment_filter="", + filter_type="", + source_file=rel_path.as_posix(), + ) + continue + + assignment_count = len([item for item in assignments if isinstance(item, dict)]) + if assignment_count == 0: + yield AssignmentRow( + category=category, + policy_type=policy_type, + object_name=object_name, + object_type=object_type, + assignment_state="Unassigned", + assignment_count=0, + intent="None", + assignment_target="No assignments", + target_type="", + assignment_filter="", + filter_type="", + source_file=rel_path.as_posix(), + ) + continue + + for assignment in assignments: + if not isinstance(assignment, dict): + continue + target = assignment.get("target") if isinstance(assignment.get("target"), dict) else {} + target_type = safe_text(target.get("@odata.type")) + intent = infer_intent(assignment, target_type) + assignment_target = resolve_assignment_target(target) + assignment_filter = safe_text(target.get("deviceAndAppManagementAssignmentFilterId")) + filter_type = safe_text(target.get("deviceAndAppManagementAssignmentFilterType")) + yield AssignmentRow( + category=category, + policy_type=policy_type, + object_name=object_name, + object_type=object_type, + assignment_state="Assigned", + assignment_count=assignment_count, + intent=intent, + assignment_target=assignment_target, + target_type=target_type, + assignment_filter=assignment_filter, + filter_type=filter_type, + source_file=rel_path.as_posix(), + ) + + +def write_csv(rows: list[AssignmentRow], output_path: Path) -> None: + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w", encoding="utf-8", newline="") as handle: + writer = csv.writer(handle) + writer.writerow( + [ + "Category", + "PolicyType", + "ObjectName", + "ObjectType", + "AssignmentState", + "AssignmentCount", + "Intent", + "AssignmentTarget", + "TargetType", + "AssignmentFilter", + "FilterType", + "SourceFile", + ] + ) + for row in rows: + writer.writerow( + [ + row.category, + row.policy_type, + row.object_name, + row.object_type, + row.assignment_state, + row.assignment_count, + row.intent, + row.assignment_target, + row.target_type, + row.assignment_filter, + row.filter_type, + row.source_file, + ] + ) + + +def write_markdown(rows: list[AssignmentRow], output_path: Path) -> None: + output_path.parent.mkdir(parents=True, exist_ok=True) + generated = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") + objects = {(row.category, row.object_name, row.source_file) for row in rows} + assigned_objects = { + (row.category, row.object_name, row.source_file) + for row in rows + if row.assignment_state == "Assigned" + } + unassigned_objects = { + (row.category, row.object_name, row.source_file) + for row in rows + if row.assignment_state == "Unassigned" + } + not_exported_objects = { + (row.category, row.object_name, row.source_file) + for row in rows + if row.assignment_state == "NotExported" + } + policy_type_counts = {} + for row in rows: + key = row.policy_type or "Unknown" + policy_type_counts[key] = policy_type_counts.get(key, 0) + 1 + + with output_path.open("w", encoding="utf-8") as handle: + handle.write("# Policy Assignment Inventory Report\n\n") + handle.write(f"Generated: `{generated}`\n\n") + handle.write(f"- Total objects in report: **{len(objects)}**\n") + handle.write(f"- Objects with assignments: **{len(assigned_objects)}**\n") + handle.write(f"- Objects without assignments: **{len(unassigned_objects)}**\n") + handle.write(f"- Objects with assignment field not exported: **{len(not_exported_objects)}**\n") + handle.write(f"- Total rows: **{len(rows)}**\n\n") + handle.write("## Rows by policy type\n\n") + handle.write("| Policy Type | Rows |\n") + handle.write("|---|---|\n") + for policy_type, count in sorted(policy_type_counts.items(), key=lambda item: item[0].lower()): + handle.write(f"| {escape_md_cell(policy_type)} | {count} |\n") + handle.write("\n") + handle.write( + "| Policy Type | Category | Object | Object Type | Assignment State | Assignment Count | Intent | Assignment Target | Target Type | Filter | Filter Type | Source |\n" + ) + handle.write("|---|---|---|---|---|---|---|---|---|---|---|---|\n") + for row in rows: + handle.write( + "| " + + " | ".join( + [ + escape_md_cell(row.policy_type), + escape_md_cell(row.category), + escape_md_cell(row.object_name), + escape_md_cell(row.object_type), + escape_md_cell(row.assignment_state), + escape_md_cell(str(row.assignment_count)), + escape_md_cell(row.intent), + escape_md_cell(row.assignment_target), + escape_md_cell(row.target_type), + escape_md_cell(row.assignment_filter), + escape_md_cell(row.filter_type), + escape_md_cell(row.source_file), + ] + ) + + " |\n" + ) + + +def main() -> int: + args = parse_args() + root = Path(args.root).resolve() + output_dir = Path(args.output_dir).resolve() + policy_type_filter = parse_filter_values(args.policy_type) + graph_type_filter = parse_filter_values(args.graph_type) + using_default_policy_scope = False + + if not policy_type_filter: + policy_type_filter = set(DEFAULT_POLICY_TYPES) + using_default_policy_scope = True + + if not root.exists(): + raise SystemExit(f"Backup path does not exist: {root}") + + rows = sorted( + iter_assignment_rows(root, policy_type_filter, graph_type_filter), + key=lambda x: ( + x.policy_type.lower(), + x.category.lower(), + x.object_name.lower(), + x.assignment_state, + x.intent.lower(), + x.assignment_target.lower(), + ), + ) + + markdown_path = output_dir / "policy-assignments.md" + csv_path = output_dir / "policy-assignments.csv" + write_markdown(rows, markdown_path) + write_csv(rows, csv_path) + + print( + f"Generated assignment report with {len(rows)} rows: " + f"{markdown_path} and {csv_path}" + ) + if using_default_policy_scope: + print( + "Applied default policy scope: " + + ", ".join(sorted(DEFAULT_POLICY_TYPES)) + ) + elif policy_type_filter: + print(f"Applied policy type filter: {', '.join(sorted(policy_type_filter))}") + if graph_type_filter: + print(f"Applied graph type filter: {', '.join(sorted(graph_type_filter))}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/generate_object_inventory_reports.py b/scripts/generate_object_inventory_reports.py new file mode 100644 index 0000000..caabe05 --- /dev/null +++ b/scripts/generate_object_inventory_reports.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +"""Generate broad object inventory CSV reports from backup JSON files.""" + +from __future__ import annotations + +import argparse +import csv +import json +import re +from pathlib import Path + + +GROUP_TARGET_TYPES = { + "#microsoft.graph.groupAssignmentTarget", + "#microsoft.graph.exclusionGroupAssignmentTarget", +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--root", required=True, help="Path to the workload backup root (for example tenant-state/intune).") + parser.add_argument( + "--output-dir", + required=True, + help="Directory where report files will be written.", + ) + parser.add_argument( + "--per-type-dir", + default="Object Inventory", + help="Directory name under output-dir for per-policy-type CSVs.", + ) + return parser.parse_args() + + +def safe_text(value: object) -> str: + if value is None: + return "" + return str(value).strip() + + +def slugify(value: str) -> str: + text = safe_text(value).lower() + text = re.sub(r"[^a-z0-9]+", "-", text).strip("-") + return text or "unknown" + + +def infer_intent(assignment: dict, target_type: str) -> str: + if "exclusion" in target_type.lower(): + return "Exclude" + explicit = safe_text(assignment.get("intent")).lower() + if explicit in {"exclude"}: + return "Exclude" + return "Include" + + +def resolve_assignment_target(target: dict) -> str: + target_type = safe_text(target.get("@odata.type")) + if target_type == "#microsoft.graph.allDevicesAssignmentTarget": + return "All devices" + if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget": + return "All users" + if target_type in GROUP_TARGET_TYPES: + return ( + safe_text(target.get("groupDisplayName")) + or safe_text(target.get("groupName")) + or safe_text(target.get("groupId")) + or "Unresolved group" + ) + return ( + safe_text(target.get("groupDisplayName")) + or safe_text(target.get("groupName")) + or safe_text(target.get("displayName")) + or safe_text(target.get("id")) + or "Unknown target" + ) + + +def summarize_assignments(payload: dict) -> dict[str, object]: + assignments = payload.get("assignments") + if not isinstance(assignments, list): + return { + "state": "NotExported", + "total": 0, + "include_targets": "", + "exclude_targets": "", + "all_users_assigned": "false", + "all_devices_assigned": "false", + } + + include_targets: list[str] = [] + exclude_targets: list[str] = [] + all_users = False + all_devices = False + + valid = [item for item in assignments if isinstance(item, dict)] + for assignment in valid: + target = assignment.get("target") if isinstance(assignment.get("target"), dict) else {} + target_type = safe_text(target.get("@odata.type")) + target_name = resolve_assignment_target(target) + intent = infer_intent(assignment, target_type) + if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget": + all_users = True + if target_type == "#microsoft.graph.allDevicesAssignmentTarget": + all_devices = True + if intent == "Exclude": + exclude_targets.append(target_name) + else: + include_targets.append(target_name) + + state = "Assigned" if valid else "Unassigned" + if assignments == []: + state = "Unassigned" + + return { + "state": state, + "total": len(valid), + "include_targets": "; ".join(sorted(set(include_targets))), + "exclude_targets": "; ".join(sorted(set(exclude_targets))), + "all_users_assigned": str(all_users).lower(), + "all_devices_assigned": str(all_devices).lower(), + } + + +def iter_rows(root: Path) -> list[dict[str, str]]: + rows: list[dict[str, str]] = [] + for path in sorted(root.rglob("*.json")): + rel = path.relative_to(root) + if rel.parts and rel.parts[0] in {"reports"}: + continue + if "__archive__" in rel.parts: + continue + + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except Exception: + continue + if not isinstance(payload, dict): + continue + + summary = summarize_assignments(payload) + policy_type = rel.parts[0] if rel.parts else "" + category = "/".join(rel.parent.parts) + object_name = safe_text(payload.get("displayName")) or safe_text(payload.get("name")) + if not object_name: + object_name = path.stem.split("__")[0] + + rows.append( + { + "PolicyType": policy_type, + "Category": category, + "ObjectName": object_name, + "ObjectType": safe_text(payload.get("@odata.type")), + "ObjectId": safe_text(payload.get("id")), + "AppId": safe_text(payload.get("appId")), + "Description": safe_text(payload.get("description")), + "AssignmentState": safe_text(summary["state"]), + "AssignmentCount": str(summary["total"]), + "IncludeTargets": safe_text(summary["include_targets"]), + "ExcludeTargets": safe_text(summary["exclude_targets"]), + "AllUsersAssigned": safe_text(summary["all_users_assigned"]), + "AllDevicesAssigned": safe_text(summary["all_devices_assigned"]), + "SourceFile": rel.as_posix(), + } + ) + + rows.sort( + key=lambda row: ( + row["PolicyType"].lower(), + row["Category"].lower(), + row["ObjectName"].lower(), + row["SourceFile"].lower(), + ) + ) + return rows + + +def write_csv(path: Path, rows: list[dict[str, str]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + headers = [ + "PolicyType", + "Category", + "ObjectName", + "ObjectType", + "ObjectId", + "AppId", + "Description", + "AssignmentState", + "AssignmentCount", + "IncludeTargets", + "ExcludeTargets", + "AllUsersAssigned", + "AllDevicesAssigned", + "SourceFile", + ] + with path.open("w", encoding="utf-8", newline="") as handle: + writer = csv.DictWriter(handle, fieldnames=headers) + writer.writeheader() + writer.writerows(rows) + + +def main() -> int: + args = parse_args() + root = Path(args.root).resolve() + output_dir = Path(args.output_dir).resolve() + per_type_root = output_dir / args.per_type_dir + + if not root.exists(): + raise SystemExit(f"Backup path does not exist: {root}") + + rows = iter_rows(root) + all_report = output_dir / "object-inventory-all.csv" + write_csv(all_report, rows) + + per_type_counts: dict[str, int] = {} + for policy_type in sorted({row["PolicyType"] for row in rows}): + type_rows = [row for row in rows if row["PolicyType"] == policy_type] + per_type_report = per_type_root / f"{slugify(policy_type)}-inventory.csv" + write_csv(per_type_report, type_rows) + per_type_counts[policy_type] = len(type_rows) + + print( + f"Generated object inventory reports: all={all_report}, " + f"perTypeCount={len(per_type_counts)}, rows={len(rows)}" + ) + for policy_type, count in sorted(per_type_counts.items(), key=lambda item: item[0].lower()): + print(f" - {policy_type}: {count} rows") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/queue_post_merge_restore.py b/scripts/queue_post_merge_restore.py new file mode 100644 index 0000000..428c1a5 --- /dev/null +++ b/scripts/queue_post_merge_restore.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python3 +"""Queue restore automatically after merged rolling PR that contains /reject decisions.""" + +from __future__ import annotations + +import argparse +import base64 +import datetime as dt +import json +import os +import re +import sys +import urllib.parse +from pathlib import Path +from typing import Any + +# common.py lives in the same directory; ensure it can be imported when the +# script is executed directly. +_sys_path_inserted = False +if __file__: + _script_dir = str(Path(__file__).resolve().parent) + if _script_dir not in sys.path: + sys.path.insert(0, _script_dir) + _sys_path_inserted = True + +import common + +if _sys_path_inserted: + sys.path.pop(0) + +_env_text = common.env_text +_env_bool = common.env_bool +_request_json = common.request_json + +REJECT_CMD_RE = re.compile(r"(?im)^\s*(?:/|#)?reject\b") +DECISION_RE = re.compile(r"(?im)^\s*(?:/|#)?(?Preject|accept)\b") +AUTO_TICKET_THREAD_PREFIX = "AUTO-CHANGE-TICKET:" +MERGE_MARKER_PREFIX = "AUTO-RESTORE-AFTER-MERGE:" + + +def _normalize_branch(branch: str) -> str: + b = branch.strip() + if b.startswith("refs/heads/"): + return b[len("refs/heads/") :] + return b + + +def _ref_from_branch(branch: str) -> str: + return f"refs/heads/{_normalize_branch(branch)}" + + +def _parse_iso_utc(value: str) -> dt.datetime | None: + text = (value or "").strip() + if not text: + return None + if text.endswith("Z"): + text = text[:-1] + "+00:00" + try: + parsed = dt.datetime.fromisoformat(text) + except ValueError: + return None + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=dt.timezone.utc) + return parsed.astimezone(dt.timezone.utc) + + +def _query_completed_prs( + repo_api: str, + headers: dict[str, str], + source_ref: str, + target_ref: str, +) -> list[dict[str, Any]]: + query = urllib.parse.urlencode( + { + "searchCriteria.status": "completed", + "searchCriteria.sourceRefName": source_ref, + "searchCriteria.targetRefName": target_ref, + "api-version": "7.1", + }, + quote_via=urllib.parse.quote, + safe="/", + ) + payload = _request_json(f"{repo_api}/pullrequests?{query}", headers=headers) + items = payload.get("value", []) if isinstance(payload, dict) else [] + return sorted(items, key=lambda x: x.get("closedDate", ""), reverse=True) + + +def _threads(repo_api: str, headers: dict[str, str], pr_id: int) -> list[dict[str, Any]]: + payload = _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", + headers=headers, + ) + return payload.get("value", []) if isinstance(payload, dict) else [] + + +def _thread_comment_contents(threads: list[dict[str, Any]]) -> list[str]: + out: list[str] = [] + for thread in threads: + comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else [] + for comment in comments: + out.append(str(comment.get("content", "") or "")) + return out + + +def _ticket_path_from_content(content: str) -> str | None: + marker_re = re.compile( + r"(?:^|\n)\s*(?:Automation marker:\s*)?" + + re.escape(AUTO_TICKET_THREAD_PREFIX) + + r"(?P[A-Za-z0-9_-]+)\s*(?:$|\n)" + ) + match = marker_re.search(content or "") + if not match: + return None + encoded = match.group("id") + padding = "=" * ((4 - len(encoded) % 4) % 4) + try: + return base64.urlsafe_b64decode((encoded + padding).encode("ascii")).decode("utf-8") + except Exception: + return None + + +def _latest_thread_decision(comments: list[dict[str, Any]]) -> str | None: + decision: str | None = None + + def _comment_sort_key(comment: dict[str, Any]) -> tuple[int, int]: + try: + comment_id = int(comment.get("id", 0)) + except Exception: + comment_id = 0 + try: + parent_id = int(comment.get("parentCommentId", 0)) + except Exception: + parent_id = 0 + return (comment_id, parent_id) + + for comment in sorted(comments, key=_comment_sort_key): + content = str(comment.get("content", "") or "") + match = DECISION_RE.search(content) + if match: + decision = match.group("decision").lower() + return decision + + +def _rejected_ticket_paths(threads: list[dict[str, Any]]) -> list[str]: + rejected: set[str] = set() + for thread in threads: + comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else [] + marker_path: str | None = None + for comment in comments: + marker_path = _ticket_path_from_content(str(comment.get("content", "") or "")) + if marker_path: + break + if not marker_path: + continue + + decision = _latest_thread_decision(comments) + if decision == "reject": + rejected.add(marker_path) + return sorted(rejected) + + +def _has_reject_signal(comments: list[str]) -> bool: + for content in comments: + if REJECT_CMD_RE.search(content): + return True + if "Auto-action: /reject detected." in content: + return True + return False + + +def _has_merge_marker(comments: list[str], merge_commit: str) -> bool: + marker = f"Automation marker: {MERGE_MARKER_PREFIX}{merge_commit}" + return any(marker in content for content in comments) + + +def _is_permission_error(exc: Exception) -> bool: + msg = str(exc).lower() + return "http 403" in msg or "forbidden" in msg + + +def _normalize_exclude_csv(value: str) -> str: + normalized = str(value or "").strip() + if normalized.lower() in {"", "none", "null", "n/a", "-", "_none_"}: + return "" + return normalized + + +def _diagnose_queue_permission( + collection_uri: str, + project: str, + headers: dict[str, str], + definition_id: int, +) -> None: + definition_url = ( + f"{collection_uri}/{project}/_apis/build/definitions/{definition_id}" + "?api-version=7.1" + ) + try: + payload = _request_json(definition_url, headers=headers) + definition_name = str(payload.get("name", "") or "").strip() + print( + "Diagnostic: restore pipeline definition is readable " + f"(id={definition_id}, name='{definition_name or 'n/a'}')." + ) + print( + "Diagnostic: queue call was forbidden, so missing permission is likely " + "'Queue builds' on that restore pipeline (or pipeline is not authorized to use it)." + ) + except Exception as diag_exc: + print( + "Diagnostic: unable to read restore pipeline definition " + f"id={definition_id}. Details: {diag_exc}" + ) + print( + "Diagnostic: likely wrong definition ID, wrong project, or missing 'View builds' permission " + "for the calling pipeline identity." + ) + + +def _queue_restore_pipeline( + collection_uri: str, + project: str, + headers: dict[str, str], + definition_id: int, + baseline_branch: str, + include_entra_update: bool, + dry_run: bool, + update_assignments: bool, + remove_unmanaged: bool, + max_workers: int, + exclude_csv: str, + restore_mode: str = "full", + restore_paths_csv: str = "", +) -> dict[str, Any]: + build_api = f"{collection_uri}/{project}/_apis/build/builds?api-version=7.1" + template_parameters = { + "dryRun": dry_run, + "updateAssignments": update_assignments, + "removeObjectsNotInBaseline": remove_unmanaged, + "includeEntraUpdate": include_entra_update, + "baselineBranch": baseline_branch, + "maxWorkers": max_workers, + "restoreMode": restore_mode, + } + if restore_mode == "selective" and restore_paths_csv.strip(): + template_parameters["restorePathsCsv"] = restore_paths_csv.strip() + exclude_csv = _normalize_exclude_csv(exclude_csv) + if exclude_csv: + template_parameters["excludeCsv"] = exclude_csv + body = { + "definition": {"id": definition_id}, + "sourceBranch": _ref_from_branch(baseline_branch), + "templateParameters": template_parameters, + } + return _request_json(build_api, headers=headers, method="POST", body=body) + + +def _post_pr_thread(repo_api: str, headers: dict[str, str], pr_id: int, content: str) -> None: + _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", + headers=headers, + method="POST", + body={ + "comments": [ + { + "parentCommentId": 0, + "content": content, + "commentType": 1, + } + ], + "status": 1, + }, + ) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Queue restore after merged rolling PR with /reject decisions") + parser.add_argument("--workload", required=True, choices=["intune", "entra"]) + parser.add_argument("--drift-branch", required=True) + parser.add_argument("--baseline-branch", required=True) + args = parser.parse_args() + + if not _env_bool("AUTO_REMEDIATE_AFTER_MERGE", False): + print("Post-merge auto-remediation disabled (set AUTO_REMEDIATE_AFTER_MERGE=true).") + return 0 + + token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip() + if not token: + raise SystemExit("SYSTEM_ACCESSTOKEN is empty.") + + definition_raw = _env_text("AUTO_REMEDIATE_RESTORE_PIPELINE_ID", "") + if not definition_raw: + print( + "Post-merge auto-remediation queue skipped: " + "AUTO_REMEDIATE_RESTORE_PIPELINE_ID is empty." + ) + return 0 + + try: + definition_id = int(definition_raw) + except ValueError as exc: + raise SystemExit(f"Invalid AUTO_REMEDIATE_RESTORE_PIPELINE_ID: {definition_raw}") from exc + + max_workers_raw = _env_text("AUTO_REMEDIATE_MAX_WORKERS", "10") + try: + max_workers = int(max_workers_raw) + except ValueError as exc: + raise SystemExit(f"Invalid AUTO_REMEDIATE_MAX_WORKERS: {max_workers_raw}") from exc + + lookback_hours_raw = _env_text("AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS", "168") + try: + lookback_hours = int(lookback_hours_raw) + except ValueError as exc: + raise SystemExit(f"Invalid AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS: {lookback_hours_raw}") from exc + + collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/") + project = os.environ["SYSTEM_TEAMPROJECT"] + repository_id = os.environ["BUILD_REPOSITORY_ID"] + + include_entra_update = _env_bool("AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE", False) + dry_run = _env_bool("AUTO_REMEDIATE_DRY_RUN", False) + update_assignments = _env_bool("AUTO_REMEDIATE_UPDATE_ASSIGNMENTS", True) + remove_unmanaged = _env_bool("AUTO_REMEDIATE_REMOVE_OBJECTS", False) + exclude_csv = _normalize_exclude_csv(_env_text("AUTO_REMEDIATE_EXCLUDE_CSV", "")) + + source_ref = _ref_from_branch(args.drift_branch) + target_ref = _ref_from_branch(args.baseline_branch) + repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}" + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + cutoff = dt.datetime.now(dt.timezone.utc) - dt.timedelta(hours=lookback_hours) + completed = _query_completed_prs(repo_api, headers, source_ref, target_ref) + + candidate: dict[str, Any] | None = None + candidate_threads: list[dict[str, Any]] = [] + candidate_comments: list[str] = [] + + for pr in completed: + closed_at = _parse_iso_utc(str(pr.get("closedDate", "") or "")) + if closed_at and closed_at < cutoff: + continue + + merge_commit = (((pr.get("lastMergeCommit") or {}).get("commitId")) or "").strip() + if not merge_commit: + continue + + pr_id = int(pr.get("pullRequestId")) + threads = _threads(repo_api, headers, pr_id) + comments = _thread_comment_contents(threads) + + if not _has_reject_signal(comments): + continue + + if _has_merge_marker(comments, merge_commit): + continue + + candidate = pr + candidate_threads = threads + candidate_comments = comments + break + + if not candidate: + print("No merged rolling PR requiring post-merge remediation was found.") + return 0 + + pr_id = int(candidate.get("pullRequestId")) + merge_commit = (((candidate.get("lastMergeCommit") or {}).get("commitId")) or "").strip() + rejected_paths = _rejected_ticket_paths(candidate_threads) + + restore_mode = "full" + restore_paths_csv = "" + if args.workload == "intune" and rejected_paths: + restore_mode = "selective" + restore_paths_csv = ",".join(rejected_paths) + print(f"Post-merge remediation scope: selective ({len(rejected_paths)} rejected path(s)).") + for path in rejected_paths: + print(f" - {path}") + else: + print("Post-merge remediation scope: full.") + + try: + queued = _queue_restore_pipeline( + collection_uri=collection_uri, + project=project, + headers=headers, + definition_id=definition_id, + baseline_branch=args.baseline_branch, + include_entra_update=include_entra_update, + dry_run=dry_run, + update_assignments=update_assignments, + remove_unmanaged=remove_unmanaged, + max_workers=max_workers, + exclude_csv=exclude_csv, + restore_mode=restore_mode, + restore_paths_csv=restore_paths_csv, + ) + except Exception as exc: + if _is_permission_error(exc): + print( + "WARNING: Post-merge remediation queue skipped due permissions. " + f"Definition={definition_id}. Details: {exc}" + ) + _diagnose_queue_permission(collection_uri, project, headers, definition_id) + print( + "Grant 'Queue builds' permission for this pipeline identity on the restore pipeline " + "and ensure the pipeline has access to run it." + ) + return 0 + raise + + build_id = queued.get("id") + build_url = ((queued.get("_links") or {}).get("web") or {}).get("href", "") + if not build_url and build_id: + build_url = f"{collection_uri}/{project}/_build/results?buildId={build_id}" + + marker = f"Automation marker: {MERGE_MARKER_PREFIX}{merge_commit}" + comment = ( + "Auto-remediation queued after merged rolling PR with reviewer /reject decision(s).\n\n" + f"Workload: {args.workload}\n" + f"Merged PR: #{pr_id}\n" + f"Merge commit: {merge_commit}\n" + f"Restore pipeline definition: {definition_id}\n" + f"Restore run: {build_url or '(queued)'}\n\n" + f"{marker}" + ) + + try: + _post_pr_thread(repo_api, headers, pr_id, comment) + except Exception as exc: + print(f"WARNING: Restore queued, but failed posting merge marker comment on PR #{pr_id}: {exc}") + + print( + f"Queued post-merge remediation for PR #{pr_id} (merge_commit={merge_commit}, buildId={build_id})." + ) + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except Exception as exc: + print(f"WARNING: Failed post-merge remediation check: {exc}", file=sys.stderr) + raise diff --git a/scripts/resolve_ca_references.py b/scripts/resolve_ca_references.py new file mode 100644 index 0000000..3645161 --- /dev/null +++ b/scripts/resolve_ca_references.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +"""Resolve Conditional Access GUID references to display names in backup JSON.""" + +from __future__ import annotations + +import argparse +import json +import pathlib +import urllib.error +import urllib.parse +import urllib.request + + +SPECIAL_APP_IDS = { + "All": "All applications", + "None": "None", + "Office365": "Office 365", + "MicrosoftAdminPortals": "Microsoft Admin Portals", +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--root", required=True, help="Path to workload backup root (for Entra: tenant-state/entra).") + parser.add_argument("--token", required=True, help="Microsoft Graph access token.") + return parser.parse_args() + + +class GraphResolver: + def __init__(self, token: str): + self.token = token.strip() + self.group_cache: dict[str, str | None] = {} + self.role_cache: dict[str, str | None] = {} + self.app_cache: dict[str, str | None] = {} + self.location_cache: dict[str, str | None] = {} + self.auth_strength_cache: dict[str, str | None] = {} + self._warned: set[str] = set() + + def _warn_once(self, key: str, message: str) -> None: + if key in self._warned: + return + self._warned.add(key) + print(f"Warning: {message}") + + def _get(self, url: str) -> dict | None: + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + }, + method="GET", + ) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + if exc.code == 404: + return None + self._warn_once(url, f"Graph lookup failed for {url} (HTTP {exc.code})") + return None + except Exception as exc: # noqa: BLE001 + self._warn_once(url, f"Graph lookup failed for {url} ({exc})") + return None + + def group_name(self, group_id: str) -> str | None: + if group_id in self.group_cache: + return self.group_cache[group_id] + url = ( + "https://graph.microsoft.com/v1.0/groups/" + + urllib.parse.quote(group_id) + + "?$select=id,displayName" + ) + payload = self._get(url) + name = payload.get("displayName") if isinstance(payload, dict) else None + self.group_cache[group_id] = name + return name + + def role_name(self, role_template_id: str) -> str | None: + if role_template_id in self.role_cache: + return self.role_cache[role_template_id] + url = ( + "https://graph.microsoft.com/v1.0/directoryRoleTemplates/" + + urllib.parse.quote(role_template_id) + + "?$select=id,displayName" + ) + payload = self._get(url) + name = payload.get("displayName") if isinstance(payload, dict) else None + self.role_cache[role_template_id] = name + return name + + def app_name(self, app_or_object_id: str) -> str | None: + if app_or_object_id in SPECIAL_APP_IDS: + return SPECIAL_APP_IDS[app_or_object_id] + if app_or_object_id in self.app_cache: + return self.app_cache[app_or_object_id] + + # CA app conditions usually use appId; try appId lookup first. + url = ( + "https://graph.microsoft.com/v1.0/servicePrincipals" + + "?$select=id,appId,displayName" + + "&$top=1" + + "&$filter=appId eq '" + + urllib.parse.quote(app_or_object_id) + + "'" + ) + payload = self._get(url) + name = None + if isinstance(payload, dict): + value = payload.get("value") + if isinstance(value, list) and value: + first = value[0] + if isinstance(first, dict): + name = first.get("displayName") + if not name: + # Fallback: treat value as service principal object id. + by_id_url = ( + "https://graph.microsoft.com/v1.0/servicePrincipals/" + + urllib.parse.quote(app_or_object_id) + + "?$select=id,appId,displayName" + ) + by_id = self._get(by_id_url) + if isinstance(by_id, dict): + name = by_id.get("displayName") + self.app_cache[app_or_object_id] = name + return name + + def location_name(self, location_id: str) -> str | None: + if location_id in self.location_cache: + return self.location_cache[location_id] + if location_id in {"All", "AllTrusted"}: + name = "All locations" if location_id == "All" else "All trusted locations" + self.location_cache[location_id] = name + return name + url = ( + "https://graph.microsoft.com/v1.0/identity/conditionalAccess/namedLocations/" + + urllib.parse.quote(location_id) + + "?$select=id,displayName" + ) + payload = self._get(url) + name = payload.get("displayName") if isinstance(payload, dict) else None + self.location_cache[location_id] = name + return name + + def auth_strength_name(self, auth_strength_id: str) -> str | None: + if auth_strength_id in self.auth_strength_cache: + return self.auth_strength_cache[auth_strength_id] + url = ( + "https://graph.microsoft.com/beta/identity/conditionalAccess/authenticationStrength/policies/" + + urllib.parse.quote(auth_strength_id) + + "?$select=id,displayName" + ) + payload = self._get(url) + name = payload.get("displayName") if isinstance(payload, dict) else None + self.auth_strength_cache[auth_strength_id] = name + return name + + +def resolve_id_list( + values: list, + lookup_fn, +) -> list[dict[str, str]]: + resolved: list[dict[str, str]] = [] + for raw in values: + if not isinstance(raw, str) or not raw: + continue + resolved.append( + { + "id": raw, + "displayName": lookup_fn(raw) or "Unresolved", + } + ) + return resolved + + +def main() -> int: + args = parse_args() + root = pathlib.Path(args.root).resolve() + token = args.token.strip() + + if not token: + print("No Graph token provided. Skipping Conditional Access reference enrichment.") + return 0 + + ca_dir = root / "Conditional Access" + if not ca_dir.exists(): + print(f"Conditional Access folder not found at {ca_dir}. Skipping.") + return 0 + + resolver = GraphResolver(token) + updated_files = 0 + processed_files = 0 + + for file_path in sorted(ca_dir.glob("*.json")): + try: + payload = json.loads(file_path.read_text(encoding="utf-8")) + except Exception: # noqa: BLE001 + continue + if not isinstance(payload, dict): + continue + processed_files += 1 + changed = False + + conditions = payload.get("conditions") + if not isinstance(conditions, dict): + conditions = {} + + users = conditions.get("users") + if isinstance(users, dict): + for key, lookup in ( + ("includeGroups", resolver.group_name), + ("excludeGroups", resolver.group_name), + ("includeRoles", resolver.role_name), + ("excludeRoles", resolver.role_name), + ): + value = users.get(key) + if isinstance(value, list): + resolved_key = f"{key}Resolved" + resolved_value = resolve_id_list(value, lookup) + if users.get(resolved_key) != resolved_value: + users[resolved_key] = resolved_value + changed = True + + apps = conditions.get("applications") + if isinstance(apps, dict): + for key in ("includeApplications", "excludeApplications"): + value = apps.get(key) + if isinstance(value, list): + resolved_key = f"{key}Resolved" + resolved_value = resolve_id_list(value, resolver.app_name) + if apps.get(resolved_key) != resolved_value: + apps[resolved_key] = resolved_value + changed = True + + locations = conditions.get("locations") + if isinstance(locations, dict): + for key in ("includeLocations", "excludeLocations"): + value = locations.get(key) + if isinstance(value, list): + resolved_key = f"{key}Resolved" + resolved_value = resolve_id_list(value, resolver.location_name) + if locations.get(resolved_key) != resolved_value: + locations[resolved_key] = resolved_value + changed = True + + grant_controls = payload.get("grantControls") + if isinstance(grant_controls, dict): + auth_strength = grant_controls.get("authenticationStrength") + if isinstance(auth_strength, dict): + auth_strength_id = auth_strength.get("id") + if isinstance(auth_strength_id, str) and auth_strength_id: + resolved = { + "id": auth_strength_id, + "displayName": resolver.auth_strength_name(auth_strength_id) or "Unresolved", + } + if grant_controls.get("authenticationStrengthResolved") != resolved: + grant_controls["authenticationStrengthResolved"] = resolved + changed = True + + if changed: + file_path.write_text(json.dumps(payload, indent=5, ensure_ascii=False) + "\n", encoding="utf-8") + updated_files += 1 + + print( + "Conditional Access GUID enrichment complete. " + + f"Processed files: {processed_files}. " + + f"Updated files: {updated_files}." + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/update_pr_review_summary.py b/scripts/update_pr_review_summary.py new file mode 100644 index 0000000..9a1c0aa --- /dev/null +++ b/scripts/update_pr_review_summary.py @@ -0,0 +1,2777 @@ +#!/usr/bin/env python3 +""" +Update rolling PR description with automated change summary. + +The summary includes: +- operation counts (added/modified/deleted/renamed) +- deterministic risk assessment +- optional Azure OpenAI reviewer narrative +""" + +from __future__ import annotations + +import argparse +import base64 +import json +import os +import re +import subprocess +import sys +import hashlib +import time +from collections import Counter +from dataclasses import dataclass +from pathlib import Path +from typing import Any +from urllib.error import HTTPError, URLError +from urllib.parse import quote, urlencode, urlsplit +from urllib.request import Request, urlopen + +# common.py lives in the same directory; ensure it can be imported when the +# script is executed directly. +_sys_path_inserted = False +if __file__: + _script_dir = str(Path(__file__).resolve().parent) + if _script_dir not in sys.path: + sys.path.insert(0, _script_dir) + _sys_path_inserted = True + +import common + +if _sys_path_inserted: + sys.path.pop(0) + +_env_bool = common.env_bool +_run_git = common.run_git + + +AUTO_BLOCK_START = "" +AUTO_BLOCK_END = "" +AUTO_SUMMARY_VERSION = "2026-04-08a" +# Legacy marker retained for one-time cleanup in PR descriptions. +TICKET_BLOCK_START = "" +TICKET_BLOCK_END = "" +AUTO_TICKET_THREAD_PREFIX = "AUTO-CHANGE-TICKET:" +AUTO_AI_REVIEW_THREAD_PREFIX = "AUTO-AI-REVIEW:" +COMPACT_AI_THREAD_NOTE = "_Full AI reviewer narrative is posted in a dedicated PR thread due PR description limits._" + +THREAD_STATUS_ACTIVE = 1 +THREAD_STATUS_FIXED = 2 +THREAD_STATUS_WONT_FIX = 3 +THREAD_STATUS_CLOSED = 4 +THREAD_STATUS_BY_DESIGN = 5 +THREAD_STATUS_PENDING = 6 + +VOLATILE_KEY_NAMES = { + "id", + "displayname", + "assignments", + "createddatetime", + "lastmodifieddatetime", + "modifieddatetime", + "updateddatetime", + "generateddatetime", + "version", + "@odata.context", + "@odata.type", + "@odata.etag", + # Entra export enrichment metadata (operational, not desired config drift). + "ownersresolved", + "approleassignmentsresolved", + "requiredresourceaccessresolved", + "appownerorganizationresolved", + "resolutionstatus", +} + +ENTRA_ENRICHMENT_KEY_NAMES = { + "ownersresolved", + "approleassignmentsresolved", + "requiredresourceaccessresolved", + "appownerorganizationresolved", + "resolutionstatus", +} + + +def _reviewer_system_prompt() -> str: + return ( + "You analyze configuration drift pull requests for enterprise identity and endpoint " + "management systems such as Microsoft Intune and Entra ID. Your job is to help " + "reviewers quickly understand operational impact and security implications of " + "configuration changes, including whether the evidence suggests platform-managed " + "infrastructure drift, tenant-admin intent, or a mixed/uncertain source." + ) + + +def _reviewer_instruction() -> str: + return ( + "Produce a concise PR reviewer summary for configuration changes. " + "Assume the reviewer may not be a deep technical expert.\n\n" + "Context signals are provided such as scope, posture change classification, " + "baseline alignment relative to the approved configuration branch and\n" + "the active security baseline profile (for example CIS benchmark derived),\n" + "and the top configuration areas affected.\n\n" + "Structure the response with sections:\n" + "Plain-language summary\n" + "Operational impact\n" + "Risk assessment rationale\n" + "Recommended reviewer checks\n" + "Rollback considerations\n\n" + "Rules:\n" + "- Use only the provided change list and facts.\n" + "- Do not assume settings not present in the input.\n" + "- Reference the affected policy paths or configuration areas.\n" + "- Highlight security-relevant changes if present.\n" + "- Mention major affected areas if provided in top_changed_areas.\n" + "- Prefer the semantic_change descriptions when explaining what changed.\n" + "- Use change_source_assessment as a deterministic heuristic, but validate it against the specific changed paths and semantic_change details.\n" + "- Distinguish probable platform-managed or vendor-driven infrastructure drift from probable tenant-admin changes.\n" + "- Treat Microsoft/platform-added objects, background service updates, auto-created integrations, and metadata churn as infrastructure changes unless the supplied facts show explicit admin intent.\n" + "- Treat policy logic, assignments, targeting, access scope, compliance, enrollment, automation, approval, and app configuration choices as admin changes unless the supplied facts suggest they are platform-managed.\n" + "- For App Registrations and Enterprise Applications: do not downgrade risk to LOW solely because the change source appears platform-driven. New or modified apps that expose requiredResourceAccess, appRoles, oauth2PermissionScopes, passwordCredentials, keyCredentials, redirectUris, or preAuthorizedApplications carry tenant security impact regardless of who created the object.\n" + "- When the highest deterministic risk is HIGH and the changed paths include app identity objects, the narrative risk rating must be at least MEDIUM unless the specific facts show purely cosmetic metadata changes.\n" + "- If the evidence is mixed or insufficient, say so explicitly instead of guessing.\n" + "- Keep PRs for both kinds of changes; explain which category dominates the drift and why.\n" + "- For assignment filters: treat filter removal as reverting to the base target scope, not 'no devices'.\n" + "- Only claim policy is unassigned/no devices when assignment targets are removed and none remain.\n" + "- Keep the summary under 200 words." + ) + + +def _minimal_reviewer_instruction() -> str: + return ( + "Write a concise reviewer narrative using only supplied data. " + "Use sections: Plain-language summary, Operational impact, Risk assessment rationale, " + "Recommended reviewer checks, Rollback considerations. " + "State whether the drift looks primarily infrastructure/platform-driven, " + "primarily admin-driven, or mixed/uncertain based on the evidence. " + "Keep under 170 words." + ) + + +@dataclass +class ChangeItem: + operation: str + path: str + risk_score: int + risk_label: str + reason: str + policy_type: str + severity: str + old_path: str | None = None + + +def _is_doc_like(path: str) -> bool: + lp = path.lower() + doc_suffixes = (".md", ".html", ".htm", ".pdf", ".csv", ".txt") + if lp.endswith(doc_suffixes): + return True + if "/docs/" in lp or "/object inventory/" in lp: + return True + return False + + +def _is_report_like(path: str) -> bool: + lp = path.lower().replace("\\", "/") + return "/reports/" in lp or "/assignment report/" in lp + + +def _env(name: str, required: bool = True, default: str = "") -> str: + value = os.environ.get(name, "").strip() + if value: + return value + if required: + raise RuntimeError(f"Required environment variable is missing: {name}") + return default + + +def _env_int(name: str, default: int) -> int: + raw = os.environ.get(name, "").strip() + if not raw: + return default + try: + return int(raw) + except ValueError: + return default + + + +def _delay_reviewer_notifications_enabled() -> bool: + return _env_bool("ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS", False) + + +def _debug_enabled() -> bool: + return ( + _env_bool("DEBUG_CHANGE_TICKET_THREADS") + or _env_bool("DEBUG_CHANGE_TICKETS") + or _env_bool("SYSTEM_DEBUG") + ) + + +def _debug(msg: str) -> None: + if _debug_enabled(): + print(f"[change-ticket-debug] {msg}") + + +def _normalize_aoai_endpoint(endpoint: str) -> str: + cleaned = endpoint.strip().rstrip("/") + if not cleaned: + return cleaned + + parsed = urlsplit(cleaned) + if parsed.scheme and parsed.netloc: + cleaned = f"{parsed.scheme}://{parsed.netloc}" + + marker = "/openai" + idx = cleaned.lower().find(marker) + if idx != -1: + return cleaned[:idx] + return cleaned + + + +def _run_diff_name_status(repo_root: str, baseline_branch: str, drift_branch: str) -> str: + three_dot = f"origin/{baseline_branch}...origin/{drift_branch}" + two_dot = f"origin/{baseline_branch}..origin/{drift_branch}" + try: + return _run_git(repo_root, ["diff", "--name-status", "--find-renames", three_dot]) + except RuntimeError as exc: + err = str(exc).lower() + if "no merge base" not in err: + raise + print( + "WARNING: No merge base for rolling branches " + f"(origin/{baseline_branch}, origin/{drift_branch}); using direct diff." + ) + return _run_git(repo_root, ["diff", "--name-status", "--find-renames", two_dot]) + + +def _retry_after_seconds(exc: HTTPError) -> float | None: + retry_after = exc.headers.get("Retry-After") + if not retry_after: + return None + try: + return max(0.0, float(retry_after)) + except ValueError: + return None + + +def _request_json(url: str, token: str, method: str = "GET", body: dict[str, Any] | None = None) -> dict[str, Any]: + try: + result = common.request_json( + url, + method=method, + body=body, + token=token, + timeout=45, + max_retries=3, + ) + except RuntimeError: + raise + except Exception as exc: + raise RuntimeError(f"{method} {url} failed: {exc}") from exc + if isinstance(result, dict): + return result + return {"value": result} if result is not None else {} + + +def _is_description_limit_error(exc: Exception) -> bool: + text = str(exc).strip().lower() + if "http 413" in text or "request entity too large" in text: + return True + size_terms = ( + "too large", + "too long", + "maximum length", + "max length", + "exceeds the maximum", + "exceeds maximum", + "exceeds the limit", + "payload too large", + "content length", + "description", + ) + limit_terms = ("limit", "length", "size", "large", "long") + return any(term in text for term in size_terms) and any(term in text for term in limit_terms) + + +def _risk_label(score: int) -> str: + if score >= 3: + return "HIGH" + if score == 2: + return "MEDIUM" + return "LOW" + + +def _classify_policy_type(path: str) -> str: + lp = path.lower() + if "conditional access" in lp: + return "conditional_access" + if "device configurations" in lp or "settings catalog" in lp: + return "device_configuration" + if "compliance policies" in lp: + return "compliance_policy" + if "scripts" in lp: + return "script" + if "app configuration" in lp: + return "app_configuration" + if "app protection" in lp: + return "app_protection" + if "roles" in lp or "identity" in lp or "authentication" in lp: + return "identity_security" + return "other" + + +def _severity_from_change(operation: str, risk_score: int, policy_type: str) -> str: + if operation == "Deleted" and risk_score >= 3: + return "CRITICAL" + if policy_type in ("conditional_access", "identity_security") and risk_score >= 3: + return "HIGH" + if risk_score == 3: + return "HIGH" + if risk_score == 2: + return "MEDIUM" + return "LOW" + + +def _classify_risk(path: str, operation: str, backup_folder: str, reports_subdir: str) -> tuple[int, str]: + p = path.replace("\\", "/") + lp = p.lower() + + if _is_doc_like(p): + return (1, "Documentation/report artifact") + + if lp.startswith(f"{backup_folder.lower()}/{reports_subdir.lower()}/") or "/assignment report/" in lp: + return (1, "Generated report/documentation output") + + high_markers = [ + "/conditional access/", + "/compliance policies/", + "/device configurations/", + "/settings catalog/", + "/scripts/", + "/entra/conditional-access/", + "/entra/authentication-strengths/", + "/entra/named-locations/", + "/entra/app-registrations/", + "/entra/enterprise-applications/", + "/authentication/", + "/identity protection/", + "/roles/", + "/privileged identity management/", + "/admin units/", + ] + medium_markers = [ + "/applications/", + "/app protection/", + "/app configuration/", + "/enrollment ", + "/enrollment/", + "/filters/", + "/scope tags/", + "/device management settings/", + "/apple vpp tokens/", + "/apple push notification/", + ] + + score = 1 + reason = "Metadata or lower-impact configuration area" + if any(marker in lp for marker in high_markers): + score = 3 + reason = "Security or broad policy area" + elif any(marker in lp for marker in medium_markers): + score = 2 + reason = "Workload configuration area" + + if operation == "Deleted": + score = min(3, score + 1) + reason = f"{reason}; deletion increases impact" + elif operation == "Renamed": + score = min(3, score + 1) + reason = f"{reason}; rename may hide semantic changes" + + script_suffixes = (".ps1", ".sh", ".mobileconfig", ".xml") + if p.lower().endswith(script_suffixes): + score = 3 + reason = "Script/payload change can have immediate device impact" + + return (score, reason) + + +def _parse_changes(diff_output: str, backup_folder: str, reports_subdir: str) -> list[ChangeItem]: + op_map = { + "A": "Added", + "M": "Modified", + "D": "Deleted", + "R": "Renamed", + "C": "Copied", + "T": "TypeChanged", + "U": "Unmerged", + "X": "Unknown", + "B": "Broken", + } + + backup_root = backup_folder.strip().strip("/").lower() + backup_prefix = f"{backup_root}/" if backup_root else "" + + def _is_policy_scope_path(path: str) -> bool: + normalized = path.replace("\\", "/").lstrip("/").lower() + if not backup_prefix: + return True + return normalized.startswith(backup_prefix) + + changes: list[ChangeItem] = [] + for raw_line in diff_output.splitlines(): + line = raw_line.strip() + if not line: + continue + parts = line.split("\t") + if not parts: + continue + status_token = parts[0] + status_code = status_token[0] + operation = op_map.get(status_code, "Modified") + old_path: str | None = None + if operation == "Renamed" and len(parts) >= 3: + old_path = parts[1] + path = parts[2] + else: + path = parts[-1] + in_scope = _is_policy_scope_path(path) or (old_path is not None and _is_policy_scope_path(old_path)) + if not in_scope: + continue + if _is_doc_like(path) or _is_report_like(path): + continue + risk_score, reason = _classify_risk(path, operation, backup_folder, reports_subdir) + if old_path: + old_risk_score, old_reason = _classify_risk(old_path, operation, backup_folder, reports_subdir) + if old_risk_score > risk_score: + risk_score = old_risk_score + reason = old_reason + policy_type = _classify_policy_type(path) + severity = _severity_from_change(operation, risk_score, policy_type) + changes.append( + ChangeItem( + operation=operation, + path=path, + risk_score=risk_score, + risk_label=_risk_label(risk_score), + reason=reason, + policy_type=policy_type, + severity=severity, + old_path=old_path, + ) + ) + return changes + + +def _normalize_branch_name(branch: str) -> str: + normalized = branch.strip() + for _ in range(2): + if normalized.startswith("origin/"): + normalized = normalized[len("origin/") :] + if normalized.startswith("refs/heads/"): + normalized = normalized[len("refs/heads/") :] + return normalized + + +def _changes_fingerprint(changes: list[ChangeItem]) -> str: + canonical = sorted( + f"{item.operation}\t{item.old_path or ''}\t{item.path}\t{item.risk_score}\t{item.policy_type}" + for item in changes + ) + payload = "\n".join(canonical) + return hashlib.sha256(payload.encode("utf-8")).hexdigest()[:12] + + +def _md_cell(text: str) -> str: + return text.replace("|", "\\|").replace("\n", "
") + + +def _ellipsize(text: str, max_len: int) -> str: + if max_len <= 0: + return "" + if len(text) <= max_len: + return text + if max_len <= 3: + return text[:max_len] + return text[: max_len - 3].rstrip() + "..." + + +def _ellipsize_path(path: str, max_len: int) -> str: + if max_len <= 0: + return "" + if len(path) <= max_len: + return path + if max_len <= 6: + return path[-max_len:] + tail = max_len - 4 + return ".../" + path[-tail:] + + +def _assignment_entries(payload: Any) -> list[dict[str, str]]: + if not isinstance(payload, dict): + return [] + assignments = payload.get("assignments") + if not isinstance(assignments, list): + return [] + + entries: list[dict[str, str]] = [] + for assignment in assignments: + if not isinstance(assignment, dict): + continue + target = assignment.get("target", {}) + if not isinstance(target, dict): + target = {} + group_name = ( + str(target.get("groupDisplayName", "") or "") + or str(target.get("groupName", "") or "") + or str(target.get("displayName", "") or "") + or str(assignment.get("targetDisplayName", "") or "") + ) + entries.append( + { + "source": str(assignment.get("source", "") or ""), + "intent": str(assignment.get("intent", "") or ""), + "target_type": str(target.get("@odata.type", "") or ""), + "group_id": str(target.get("groupId", "") or ""), + "group_name": group_name, + "collection_id": str(target.get("collectionId", "") or ""), + "filter_type": str(target.get("deviceAndAppManagementAssignmentFilterType", "") or ""), + "filter_id": str(target.get("deviceAndAppManagementAssignmentFilterId", "") or ""), + } + ) + return entries + + +def _assignment_group_label(entry: dict[str, str]) -> str: + group_name = str(entry.get("group_name", "") or "").strip() + group_id = str(entry.get("group_id", "") or "").strip() + if group_name and group_id: + return f"{group_name} ({group_id})" + if group_name: + return group_name + if group_id: + return group_id + return "all" + + +def _assignment_signature(entry: dict[str, str]) -> str: + parts = [ + f"type={entry.get('target_type', '') or 'n/a'}", + f"group={_assignment_group_label(entry)}", + f"collection={entry.get('collection_id', '') or 'n/a'}", + f"intent={entry.get('intent', '') or 'n/a'}", + f"source={entry.get('source', '') or 'n/a'}", + ] + filter_type = entry.get("filter_type", "") or "none" + filter_id = entry.get("filter_id", "") or "none" + parts.append(f"filter={filter_type}/{filter_id}") + return "; ".join(parts) + + +def _is_exclusion_target_type(target_type: str) -> bool: + lowered = str(target_type or "").strip().lower() + return "exclusion" in lowered + + +def _normalized_assignment_signatures(payload: Any) -> list[str]: + signatures = [_assignment_signature(entry) for entry in _assignment_entries(payload)] + return sorted(signatures) + + +def _describe_assignment_changes(old_payload: Any, new_payload: Any) -> list[str]: + old_entries = _assignment_entries(old_payload) + new_entries = _assignment_entries(new_payload) + if old_entries == new_entries: + return [] + + def _base_key(entry: dict[str, str]) -> tuple[str, str, str, str, str]: + group_identity = str(entry.get("group_id", "") or "").strip() + if not group_identity: + group_identity = str(entry.get("group_name", "") or "").strip().casefold() + return ( + entry.get("target_type", ""), + group_identity, + entry.get("collection_id", ""), + entry.get("intent", ""), + entry.get("source", ""), + ) + + old_map = {_base_key(entry): entry for entry in old_entries} + new_map = {_base_key(entry): entry for entry in new_entries} + changes: list[str] = [] + + def _has_filter(entry: dict[str, str]) -> bool: + return bool((entry.get("filter_type", "") or "").strip()) or bool((entry.get("filter_id", "") or "").strip()) + + def _filter_scope_hint(old_entry: dict[str, str], new_entry: dict[str, str]) -> str: + old_has = _has_filter(old_entry) + new_has = _has_filter(new_entry) + if old_has and not new_has: + return "scope likely broader (unfiltered base target)" + if not old_has and new_has: + return "scope likely narrower (filtered subset of base target)" + + old_type = (old_entry.get("filter_type", "") or "").strip().lower() + new_type = (new_entry.get("filter_type", "") or "").strip().lower() + old_id = (old_entry.get("filter_id", "") or "").strip().lower() + new_id = (new_entry.get("filter_id", "") or "").strip().lower() + if old_type != new_type: + return "scope impact ambiguous (include/exclude semantics changed)" + if old_id != new_id: + return "scope impact ambiguous (different filter population)" + return "scope impact ambiguous" + + for key in sorted(set(old_map.keys()) & set(new_map.keys())): + old_entry = old_map[key] + new_entry = new_map[key] + old_filter = (old_entry.get("filter_type", ""), old_entry.get("filter_id", "")) + new_filter = (new_entry.get("filter_type", ""), new_entry.get("filter_id", "")) + if old_filter != new_filter: + target_label = old_entry.get("target_type", "") or "assignment" + old_filter_text = f"{old_filter[0] or 'none'}/{old_filter[1] or 'none'}" + new_filter_text = f"{new_filter[0] or 'none'}/{new_filter[1] or 'none'}" + scope_hint = _filter_scope_hint(old_entry, new_entry) + changes.append( + f"assignment filter ({target_label}): {old_filter_text} -> {new_filter_text} [{scope_hint}]" + ) + + # If only filter values changed on the same assignment targets, the + # explicit filter diff is clearer than additional added/removed noise. + if changes and set(old_map.keys()) == set(new_map.keys()): + return changes + + old_set = set(_normalized_assignment_signatures(old_payload)) + new_set = set(_normalized_assignment_signatures(new_payload)) + added = sorted(new_set - old_set) + removed = sorted(old_set - new_set) + + old_by_sig = {_assignment_signature(entry): entry for entry in old_entries} + new_by_sig = {_assignment_signature(entry): entry for entry in new_entries} + added_entries = [new_by_sig[sig] for sig in added if sig in new_by_sig] + removed_entries = [old_by_sig[sig] for sig in removed if sig in old_by_sig] + + if added: + changes.append(f"assignment targets added: {'; '.join(added[:2])}") + if removed: + changes.append(f"assignment targets removed: {'; '.join(removed[:2])}") + + if old_entries and not new_entries: + changes.append("assignment scope: likely unassigned (all assignment targets removed)") + elif not old_entries and new_entries: + changes.append("assignment scope: newly assigned (targets added)") + elif added and not removed: + if added_entries and all(_is_exclusion_target_type(entry.get("target_type", "")) for entry in added_entries): + changes.append("assignment scope: likely narrower (more exclusion targets)") + elif added_entries and all(not _is_exclusion_target_type(entry.get("target_type", "")) for entry in added_entries): + changes.append("assignment scope: likely broader (more include/all targets)") + else: + changes.append("assignment scope: ambiguous (mixed target semantics)") + elif removed and not added: + if removed_entries and all(_is_exclusion_target_type(entry.get("target_type", "")) for entry in removed_entries): + changes.append("assignment scope: likely broader (fewer exclusion targets)") + elif removed_entries and all(not _is_exclusion_target_type(entry.get("target_type", "")) for entry in removed_entries): + changes.append("assignment scope: likely narrower (fewer include/all targets)") + else: + changes.append("assignment scope: ambiguous (mixed target semantics)") + elif added and removed: + changes.append("assignment scope: ambiguous (target mix changed)") + return changes + + +def _build_deterministic_summary( + changes: list[ChangeItem], + drift_branch: str, + baseline_branch: str, + ignored_operational_count: int = 0, +) -> str: + op_counter = Counter(item.operation for item in changes) + risk_counter = Counter(item.risk_label for item in changes) + overall_risk = _risk_label(max((item.risk_score for item in changes), default=1)) + changes_fingerprint = _changes_fingerprint(changes) + + lines: list[str] = [] + lines.append("### Change Metrics") + lines.append(f"- **Scope:** `{drift_branch}` -> `{baseline_branch}`") + lines.append(f"- **Changed Files:** **{len(changes)}**") + if ignored_operational_count > 0: + lines.append(f"- **Operational-Only Changes Ignored:** **{ignored_operational_count}**") + lines.append(f"- **Change Fingerprint:** `{changes_fingerprint}`") + lines.append("") + lines.append("| **Operation** | **Count** |") + lines.append("|---|---:|") + wrote_operation_row = False + for op in ("Added", "Modified", "Deleted", "Renamed", "Copied", "TypeChanged"): + count = op_counter.get(op, 0) + if count: + lines.append(f"| {op} | {count} |") + wrote_operation_row = True + if not wrote_operation_row: + lines.append("| (none) | 0 |") + lines.append("") + lines.append("| **Risk Level** | **Count** |") + lines.append("|---|---:|") + lines.append(f"| HIGH | {risk_counter.get('HIGH', 0)} |") + lines.append(f"| MEDIUM | {risk_counter.get('MEDIUM', 0)} |") + lines.append(f"| LOW | {risk_counter.get('LOW', 0)} |") + lines.append("") + lines.append(f"> **Overall Risk:** **{overall_risk}**") + + highlights = sorted( + [item for item in changes if item.risk_score >= 2 and not _is_doc_like(item.path)], + key=lambda item: ( + -item.risk_score, + 0 if item.operation == "Deleted" else 1, + item.path.lower(), + item.operation, + ), + )[:8] + + if highlights: + lines.append("") + lines.append("### Top Risk Items") + lines.append("| **Severity** | **Operation** | **Area** | **File** | **Why It Matters** |") + lines.append("|---|---|---|---|---|") + for item in highlights: + severity = _md_cell(f"{item.severity} / {item.risk_label}") + operation = _md_cell(item.operation) + area = _md_cell(item.policy_type) + file_path = _md_cell(_ellipsize_path(item.path, 120)) + reason = _md_cell(_ellipsize(item.reason, 80)) + lines.append( + f"| {severity} | `{operation}` | `{area}` | `{file_path}` | {reason} |" + ) + + return "\n".join(lines) + + +def _strip_entra_enrichment_fields(value: Any) -> Any: + if isinstance(value, dict): + cleaned: dict[str, Any] = {} + for key, child in value.items(): + if str(key).strip().lower() in ENTRA_ENRICHMENT_KEY_NAMES: + continue + cleaned[key] = _strip_entra_enrichment_fields(child) + return cleaned + if isinstance(value, list): + return [_strip_entra_enrichment_fields(item) for item in value] + return value + + +def _is_entra_enrichment_only_json_change(old_excerpt: str, new_excerpt: str) -> bool: + if not old_excerpt or not new_excerpt: + return False + try: + old_payload = json.loads(old_excerpt) + new_payload = json.loads(new_excerpt) + except Exception: + return False + if not isinstance(old_payload, dict) or not isinstance(new_payload, dict): + return False + + old_stripped = _strip_entra_enrichment_fields(old_payload) + new_stripped = _strip_entra_enrichment_fields(new_payload) + if old_stripped != new_stripped: + return False + return old_payload != new_payload + + +def _filter_operational_noise_changes( + repo_root: str, + baseline_branch: str, + drift_branch: str, + workload: str, + changes: list[ChangeItem], +) -> tuple[list[ChangeItem], int]: + if workload.strip().lower() != "entra": + return (changes, 0) + + filtered: list[ChangeItem] = [] + skipped = 0 + for item in changes: + if item.operation != "Modified": + filtered.append(item) + continue + if not item.path.lower().endswith(".json"): + filtered.append(item) + continue + old_excerpt = _load_policy_excerpt(repo_root, baseline_branch, item.path) + new_excerpt = _load_policy_excerpt(repo_root, drift_branch, item.path) + if _is_entra_enrichment_only_json_change(old_excerpt, new_excerpt): + skipped += 1 + _debug(f"Ignoring enrichment-only Entra drift noise for path={item.path}") + continue + filtered.append(item) + return (filtered, skipped) + + +def _fit_payload_budget( + payload: dict[str, Any], + sampled_changes: list[dict[str, Any]], + max_bytes: int, +) -> tuple[list[dict[str, Any]], bool]: + trimmed = list(sampled_changes) + truncated = False + while True: + probe = dict(payload) + probe["sampled_changes"] = trimmed + if len(json.dumps(probe, ensure_ascii=True)) <= max_bytes: + return (trimmed, truncated) + if not trimmed: + return (trimmed, True) + truncated = True + # Drop in chunks to converge quickly on very large payloads. + drop = max(1, len(trimmed) // 8) + trimmed = trimmed[:-drop] + + +def _change_scope(changes: list[ChangeItem]) -> str: + count = len(changes) + if count <= 3: + return "SMALL" + if count <= 15: + return "MODERATE" + return "LARGE" + + +def _build_change_facts(changes: list[ChangeItem]) -> dict[str, Any]: + op_counter = Counter(item.operation for item in changes) + risk_counter = Counter(item.risk_label for item in changes) + highest_risk = _risk_label(max((item.risk_score for item in changes), default=1)) + return { + "total_changes": len(changes), + "operations": dict(op_counter), + "risk_counts": dict(risk_counter), + "highest_risk": highest_risk, + } + + +def _detect_hotspots(changes: list[ChangeItem]) -> list[str]: + area_counter: Counter[str] = Counter() + for item in changes: + parts = [part for part in item.path.replace("\\", "/").split("/") if part] + if not parts: + continue + + area = parts[0] + if len(parts) >= 3 and parts[1].lower() in {"intune", "entra"}: + # tenant-state///... + area = parts[2] + elif len(parts) >= 2: + area = parts[1] + + area_counter[area.lower()] += 1 + return [area for area, _ in area_counter.most_common(5)] + + +def _classify_change_source(item: ChangeItem, semantic_change: str = "") -> dict[str, Any]: + lp = item.path.lower().replace("\\", "/") + semantic = (semantic_change or "").strip() + semantic_lower = semantic.lower() + + admin_score = 0 + infrastructure_score = 0 + admin_reasons: list[str] = [] + infrastructure_reasons: list[str] = [] + + def _add_admin(score: int, reason: str) -> None: + nonlocal admin_score + admin_score += score + if reason not in admin_reasons: + admin_reasons.append(reason) + + def _add_infrastructure(score: int, reason: str) -> None: + nonlocal infrastructure_score + infrastructure_score += score + if reason not in infrastructure_reasons: + infrastructure_reasons.append(reason) + + if item.policy_type in { + "conditional_access", + "device_configuration", + "compliance_policy", + "script", + "app_configuration", + "app_protection", + "identity_security", + }: + _add_admin(3, "Policy/control workload usually reflects tenant-admin intent") + + admin_path_markers = ( + "/conditional access/", + "/named locations/", + "/authentication strengths/", + "/device configurations/", + "/settings catalog/", + "/compliance policies/", + "/scripts/", + "/app configuration/", + "/app protection/", + "/filters/", + "/scope tags/", + "/device management settings/", + "/apple vpp tokens/", + "/apple push notification/", + "/roles/", + "/identity protection/", + "/privileged identity management/", + "/admin units/", + "/intune/applications/", + ) + if any(marker in lp for marker in admin_path_markers): + _add_admin(2, "Changed path is typically tenant-managed configuration") + + if "/enrollment " in lp or "/enrollment/" in lp: + _add_admin(2, "Enrollment settings are typically administered intentionally") + + if any( + token in semantic_lower + for token in ( + "assignment filter", + "assignment scope", + "assignment targets", + "newly assigned", + "unassigned", + "scope likely broader", + "scope likely narrower", + "include targets", + "exclude targets", + ) + ): + _add_admin(3, "Assignment/targeting semantics changed") + + if any(marker in lp for marker in ("/enterprise applications/", "/app registrations/")): + if "/enterprise applications/" in lp: + _add_infrastructure(3, "Enterprise application inventory often contains platform-managed object churn") + else: + _add_infrastructure(2, "App registration inventory can include service/platform object churn") + + if item.operation == "Added" and any(marker in lp for marker in ("/enterprise applications/", "/app registrations/")): + _add_infrastructure(1, "New application identity objects may be introduced automatically by the platform") + + if semantic_lower == "no semantic key changes detected": + _add_infrastructure(2, "No semantic setting delta detected despite file drift") + + if any( + token in semantic_lower + for token in ( + "resolutionstatus", + "ownersresolved", + "approleassignmentsresolved", + "requiredresourceaccessresolved", + "appownerorganizationresolved", + "unresolved", + ) + ): + _add_infrastructure(2, "Semantic diff resembles resolver/metadata churn") + + if semantic in {"New configuration object added", "Configuration object removed"} and any( + marker in lp for marker in ("/enterprise applications/", "/app registrations/") + ): + _add_infrastructure(1, "Object lifecycle changes in identity app inventory may be platform-driven") + + # Security-relevant app identity changes should not be treated as pure infrastructure drift. + if any(marker in lp for marker in ("/enterprise applications/", "/app registrations/")): + security_semantic_tokens = ( + "requiredresourceaccess", + "approles", + "oauth2permissionscopes", + "passwordcredentials", + "keycredentials", + "redirecturis", + "identifieruris", + "preauthorizedapplications", + "signinaudience", + ) + if any(token in semantic_lower for token in security_semantic_tokens): + _add_admin(4, "Application identity shows permission, credential, or scope changes indicating tenant security impact") + + if admin_score == 0 and infrastructure_score == 0: + label = "mixed_or_uncertain" + reasons = ["Insufficient deterministic evidence to attribute source"] + elif admin_score >= infrastructure_score + 2: + label = "likely_admin_driven" + reasons = admin_reasons[:3] or ["Deterministic signals favor tenant-admin intent"] + elif infrastructure_score >= admin_score + 2: + label = "likely_infrastructure_driven" + reasons = infrastructure_reasons[:3] or ["Deterministic signals favor platform-managed drift"] + else: + label = "mixed_or_uncertain" + reasons = (admin_reasons[:2] + infrastructure_reasons[:2])[:4] + if not reasons: + reasons = ["Signals conflict or are too weak to attribute source confidently"] + + return { + "label": label, + "admin_score": admin_score, + "infrastructure_score": infrastructure_score, + "reasons": reasons, + } + + +def _build_change_source_assessment(compact_changes: list[dict[str, Any]]) -> dict[str, Any]: + counts: Counter[str] = Counter() + signal_counter: Counter[str] = Counter() + admin_total = 0 + infrastructure_total = 0 + + for item in compact_changes: + label = str(item.get("change_source") or "mixed_or_uncertain") + counts[label] += 1 + scores = item.get("change_source_scores", {}) + if isinstance(scores, dict): + admin_total += int(scores.get("admin", 0) or 0) + infrastructure_total += int(scores.get("infrastructure", 0) or 0) + reasons = item.get("change_source_reasons", []) + if isinstance(reasons, list): + for reason in reasons[:2]: + if isinstance(reason, str) and reason.strip(): + signal_counter[reason.strip()] += 1 + + if not compact_changes: + dominant_source = "mixed_or_uncertain" + elif admin_total >= infrastructure_total + 3 and counts.get("likely_admin_driven", 0) >= counts.get("likely_infrastructure_driven", 0): + dominant_source = "primarily_admin_driven" + elif infrastructure_total >= admin_total + 3 and counts.get("likely_infrastructure_driven", 0) >= counts.get("likely_admin_driven", 0): + dominant_source = "primarily_infrastructure_driven" + else: + dominant_source = "mixed_or_uncertain" + + score_gap = abs(admin_total - infrastructure_total) + if dominant_source == "mixed_or_uncertain": + confidence = "medium" if score_gap >= 3 else "low" + else: + confidence = "high" if score_gap >= 6 else "medium" + + return { + "dominant_source": dominant_source, + "confidence": confidence, + "counts": { + "likely_admin_driven": counts.get("likely_admin_driven", 0), + "likely_infrastructure_driven": counts.get("likely_infrastructure_driven", 0), + "mixed_or_uncertain": counts.get("mixed_or_uncertain", 0), + }, + "score_totals": { + "admin": admin_total, + "infrastructure": infrastructure_total, + }, + "top_signals": [reason for reason, _ in signal_counter.most_common(5)], + } + + +def _format_change_source_label(label: str) -> str: + mapping = { + "likely_admin_driven": "likely admin-driven", + "likely_infrastructure_driven": "likely infrastructure/platform-driven", + "primarily_admin_driven": "primarily admin-driven", + "primarily_infrastructure_driven": "primarily infrastructure/platform-driven", + "mixed_or_uncertain": "mixed or uncertain", + } + return mapping.get(label, label.replace("_", " ").strip()) + + +def _classify_posture(changes: list[ChangeItem]) -> str: + """ + Classify overall security posture effect of the change set. + """ + high_risk_changes = [c for c in changes if c.risk_score >= 3] + + if not changes: + return "cosmetic_change" + + if any(c.operation == "Deleted" and c.risk_score >= 3 for c in changes): + return "potential_security_weakening" + + if high_risk_changes: + return "security_relevant_change" + + if all(c.risk_score == 1 for c in changes): + return "cosmetic_change" + + return "functional_configuration_change" + + +def _load_policy_excerpt(repo_root: str, branch: str, path: str, max_chars: int = 0) -> str: + """ + Load a small excerpt of a file from a given git branch for AI context. + """ + try: + normalized_branch = _normalize_branch_name(branch) + result = subprocess.run( + ["git", "show", f"origin/{normalized_branch}:{path}"], + cwd=repo_root, + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + return "" + content = result.stdout.strip() + if max_chars and len(content) > max_chars: + return content[:max_chars] + "...(truncated)" + return content + except Exception: + return "" + + +def _summarize_app_security_fields(excerpt: str) -> str: + """Summarize security-relevant fields in an App Registration or Enterprise Application.""" + signals: list[str] = [] + try: + payload = json.loads(excerpt) + except Exception: + return "" + if not isinstance(payload, dict): + return "" + if payload.get("requiredResourceAccess"): + signals.append("requiredResourceAccess present") + if payload.get("appRoles"): + signals.append("appRoles present") + if payload.get("passwordCredentials"): + signals.append("passwordCredentials present") + if payload.get("keyCredentials"): + signals.append("keyCredentials present") + web = payload.get("web", {}) if isinstance(payload.get("web"), dict) else {} + if web.get("redirectUris"): + signals.append("redirectUris present") + if payload.get("identifierUris"): + signals.append("identifierUris present") + api = payload.get("api", {}) if isinstance(payload.get("api"), dict) else {} + if api.get("preAuthorizedApplications"): + signals.append("preAuthorizedApplications present") + sign_in_audience = payload.get("signInAudience") + if sign_in_audience and str(sign_in_audience) != "AzureADMyOrg": + signals.append(f"signInAudience={sign_in_audience}") + return "; ".join(signals) + + +def _extract_semantic_change(old_excerpt: str, new_excerpt: str, path: str = "") -> str: + """ + Derive a semantic description of configuration changes by flattening JSON + structures and comparing dotted-key paths. This gives more precise signals + for nested policy settings such as Conditional Access conditions or + Defender settings. + """ + is_app_path = any( + marker in path.lower() for marker in ("/app registrations/", "/enterprise applications/") + ) + + if not old_excerpt and new_excerpt: + if is_app_path: + app_signals = _summarize_app_security_fields(new_excerpt) + if app_signals: + return f"New configuration object added ({app_signals})" + return "New configuration object added" + if old_excerpt and not new_excerpt: + return "Configuration object removed" + + def _is_volatile_key_name(key: str) -> bool: + lowered = key.lower() + if lowered in VOLATILE_KEY_NAMES: + return True + # Common metadata suffix/prefix variants. + if lowered.endswith("datetime") or lowered.startswith("@odata."): + return True + return False + + def _format_scalar(value: Any) -> str: + if value is None: + return "null" + text = str(value).strip() + if not text: + return '""' + if len(text) > 80: + return text[:77] + "..." + return text + + def _flatten(obj: Any, prefix: str = "") -> dict[str, Any]: + flat: dict[str, Any] = {} + if isinstance(obj, dict): + for k, v in obj.items(): + if _is_volatile_key_name(k): + continue + new_key = f"{prefix}.{k}" if prefix else k + flat.update(_flatten(v, new_key)) + elif isinstance(obj, list): + # Include a lightweight content hash so list item edits (for example + # assignment filters) are detected even when list length is unchanged. + preview = json.dumps(obj, sort_keys=True, ensure_ascii=True, separators=(",", ":")) + flat[prefix] = f"list[{len(obj)}]#{hashlib.sha256(preview.encode('utf-8')).hexdigest()[:8]}" + else: + flat[prefix] = obj + return flat + + try: + old_json = json.loads(old_excerpt) if old_excerpt else {} + new_json = json.loads(new_excerpt) if new_excerpt else {} + + old_flat = _flatten(old_json) + new_flat = _flatten(new_json) + + keys = set(old_flat.keys()) | set(new_flat.keys()) + state_changes: list[str] = [] + value_changes: list[str] = [] + structure_changes: list[str] = [] + assignment_changes = _describe_assignment_changes(old_json, new_json) + + for key in sorted(keys): + old_val = old_flat.get(key) + new_val = new_flat.get(key) + + if old_val != new_val: + if key not in old_flat: + structure_changes.append(f"{key} added") + elif key not in new_flat: + structure_changes.append(f"{key} removed") + else: + change_text = f"{key}: {_format_scalar(old_val)} -> {_format_scalar(new_val)}" + if key.lower().endswith(".state") or key.lower() == "state": + state_changes.append(change_text) + else: + value_changes.append(change_text) + + ordered_changes = assignment_changes + state_changes + value_changes + structure_changes + if not ordered_changes: + return "No semantic key changes detected" + + return "; ".join(ordered_changes[:8]) + except Exception: + if old_excerpt != new_excerpt: + return "Configuration content modified" + return "" + + +def _policy_fingerprint(json_excerpt: str) -> str: + """ + Generate a stable fingerprint for a policy configuration. + + Intune exports contain metadata (IDs, assignments, timestamps) + that differ across tenants and exports. This function removes + those fields and fingerprints only the configuration content. + """ + + if not json_excerpt: + return "" + + def _sanitize(obj: Any): + if isinstance(obj, dict): + cleaned = {} + for k, v in obj.items(): + lowered = k.lower() + if lowered == "assignments": + cleaned[k] = _normalized_assignment_signatures(obj) + continue + if lowered in VOLATILE_KEY_NAMES or lowered.endswith("datetime") or lowered.startswith("@odata."): + continue + cleaned[k] = _sanitize(v) + return cleaned + + if isinstance(obj, list): + return [_sanitize(v) for v in obj] + + return obj + + try: + parsed = json.loads(json_excerpt) + + # Prefer settings-like sections if present (common in Intune exports) + for key in ("settings", "settingsDelta", "settingDefinitions", "configuration"): + if isinstance(parsed, dict) and key in parsed: + parsed = parsed[key] + break + + sanitized = _sanitize(parsed) + + normalized = json.dumps( + sanitized, + sort_keys=True, + separators=(",", ":"), + ) + + except Exception: + normalized = json_excerpt.strip() + + return hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16] + + +def _extract_ai_text_from_payload(payload: dict[str, Any]) -> tuple[str, str]: + choices = payload.get("choices", []) + if not choices: + return ("", "no choices") + + first = choices[0] if isinstance(choices[0], dict) else {} + finish_reason = str(first.get("finish_reason") or "").strip() + message = first.get("message", {}) if isinstance(first.get("message"), dict) else {} + content = message.get("content", "") + text = "" + + if isinstance(content, str): + text = content.strip() + elif isinstance(content, list): + parts: list[str] = [] + for item in content: + if isinstance(item, str): + if item.strip(): + parts.append(item.strip()) + continue + if not isinstance(item, dict): + continue + for key in ("text", "content", "value"): + value = item.get(key) + if isinstance(value, str) and value.strip(): + parts.append(value.strip()) + break + text = "\n".join(parts).strip() + + normalized_finish_reason = finish_reason.lower() + if normalized_finish_reason and normalized_finish_reason != "stop": + detail = f"finish_reason={finish_reason}" + if text: + detail += " (partial content suppressed)" + return ("", detail) + + if text: + return (text, "") + + refusal = message.get("refusal") + if isinstance(refusal, str) and refusal.strip(): + return ("", f"model refusal: {refusal.strip()}") + + if finish_reason: + return ("", f"finish_reason={finish_reason}") + return ("", "empty message content") + + +def _build_fallback_narrative( + workload: str, + changes: list[ChangeItem], + compact_changes: list[dict[str, Any]], + top_changed_areas: list[str], + change_source_assessment: dict[str, Any], + fallback_reason: str, +) -> str: + highest_risk = _risk_label(max((item.risk_score for item in changes), default=1)) + change_scope = _change_scope(changes) + security_sensitive = any(item.risk_score >= 3 for item in changes) + source_text = _format_change_source_label( + str(change_source_assessment.get("dominant_source") or "mixed_or_uncertain") + ) + source_confidence = str(change_source_assessment.get("confidence") or "low") + + top_items = compact_changes[:3] + bullet_lines: list[str] = [] + for item in top_items: + path = str(item.get("path") or "") + op = str(item.get("operation") or "Modified") + sem = str(item.get("semantic_change") or "Configuration content modified") + bullet_lines.append(f"- `{op}` `{path}`: {sem}") + + areas_text = ", ".join(top_changed_areas[:3]) if top_changed_areas else "n/a" + sensitivity_text = "security-relevant" if security_sensitive else "configuration-impacting" + + lines = [ + "#### Plain-Language Summary", + ( + f"{workload.upper()} drift includes {len(changes)} {change_scope.lower()} " + f"{sensitivity_text} change(s), with overall risk {highest_risk}. " + f"Deterministic source assessment: {source_text} ({source_confidence} confidence)." + ), + "", + "#### Operational Impact", + f"Most affected areas: {areas_text}. Validate expected behavior for impacted policies after merge.", + "", + "#### Risk Assessment Rationale", + f"Risk is driven by changed policy areas and operations (especially deletes/renames in security-critical paths).", + "", + "#### Recommended Reviewer Checks", + ] + lines.extend(bullet_lines if bullet_lines else ["- Validate changed policy intent against expected baseline behavior."]) + lines.extend( + [ + "", + "#### Rollback Considerations", + "If behavior is not expected, revert the drift commit/PR to restore baseline state.", + "", + f"_AI fallback used: {fallback_reason}_", + ] + ) + return "\n".join(lines) + + +def _format_ai_narrative_markdown(text: str) -> str: + if not text: + return "" + + section_map = { + "plain-language summary": "#### Plain-Language Summary", + "plain language summary": "#### Plain-Language Summary", + "operational impact": "#### Operational Impact", + "risk assessment rationale": "#### Risk Assessment Rationale", + "recommended reviewer checks": "#### Recommended Reviewer Checks", + "rollback considerations": "#### Rollback Considerations", + } + + normalized_lines: list[str] = [] + for raw in text.replace("\r\n", "\n").split("\n"): + line = raw.rstrip() + stripped = line.strip() + lowered = stripped.lower().rstrip(":") + + if lowered in section_map: + normalized_lines.append(section_map[lowered]) + continue + + if stripped.startswith("•"): + normalized_lines.append("- " + stripped.lstrip("• ").strip()) + continue + + normalized_lines.append(line) + + compact: list[str] = [] + blank_count = 0 + for line in normalized_lines: + if not line.strip(): + blank_count += 1 + if blank_count > 1: + continue + else: + blank_count = 0 + compact.append(line) + + return "\n".join(compact).strip() + + +def _compact_ai_narrative_markdown(text: str, max_chars: int) -> str: + formatted = _format_ai_narrative_markdown(text) + if not formatted or max_chars <= 0: + return "" + if len(formatted) <= max_chars: + return formatted + + sections: list[tuple[str, str]] = [] + current_heading = "" + current_lines: list[str] = [] + for raw in formatted.split("\n"): + line = raw.rstrip() + if line.startswith("#### "): + if current_heading: + sections.append((current_heading, "\n".join(current_lines).strip())) + current_heading = line + current_lines = [] + continue + current_lines.append(line) + if current_heading: + sections.append((current_heading, "\n".join(current_lines).strip())) + + if not sections: + return _ellipsize(formatted, max_chars) + + weights = { + "#### Plain-Language Summary": 1.3, + "#### Operational Impact": 1.2, + "#### Risk Assessment Rationale": 1.2, + "#### Recommended Reviewer Checks": 1.0, + "#### Rollback Considerations": 0.9, + } + min_body_chars = 24 + total_weight = sum(weights.get(heading, 1.0) for heading, _ in sections) or 1.0 + body_room = max(len(sections) * min_body_chars, max_chars // 2) + budgets: list[int] = [] + for heading, body in sections: + target = int(body_room * (weights.get(heading, 1.0) / total_weight)) + budgets.append(max(min_body_chars, min(len(body), max(min_body_chars, target)))) + + def _render(current_budgets: list[int]) -> str: + parts: list[str] = [] + for idx, (heading, body) in enumerate(sections): + parts.append(heading) + if body: + parts.append(_ellipsize(body, current_budgets[idx])) + if idx < len(sections) - 1: + parts.append("") + return "\n".join(parts).strip() + + compact = _render(budgets) + while len(compact) > max_chars: + candidates = [idx for idx, budget in enumerate(budgets) if budget > min_body_chars] + if not candidates: + break + idx = max(candidates, key=lambda i: budgets[i]) + budgets[idx] = max(min_body_chars, budgets[idx] - 20) + compact = _render(budgets) + + if len(compact) <= max_chars: + return compact + return _ellipsize(compact, max_chars) + + +def _is_timeout_like_error(exc: Exception) -> bool: + text = str(exc).strip().lower() + if "timed out" in text or "timeout" in text: + return True + if isinstance(exc, TimeoutError): + return True + reason = getattr(exc, "reason", None) + if isinstance(reason, Exception): + reason_text = str(reason).strip().lower() + if "timed out" in reason_text or "timeout" in reason_text: + return True + if isinstance(reason, TimeoutError): + return True + return False + + +def _preferred_aoai_token_param(deployment_name: str) -> str: + override = os.environ.get("AZURE_OPENAI_TOKEN_PARAM", "").strip().lower() + if override in {"max_tokens", "max_completion_tokens"}: + return override + if deployment_name.strip().lower().startswith("gpt-5"): + return "max_completion_tokens" + return "max_tokens" + + +def _aoai_token_param_candidates(deployment_name: str) -> list[str]: + preferred = _preferred_aoai_token_param(deployment_name) + alternate = "max_completion_tokens" if preferred == "max_tokens" else "max_tokens" + return [preferred, alternate] + + +def _preferred_aoai_temperature(deployment_name: str) -> float | None: + override = os.environ.get("AZURE_OPENAI_TEMPERATURE", "").strip().lower() + if override in {"default", "none", "omit"}: + return None + if override: + try: + return float(override) + except ValueError: + return None + if deployment_name.strip().lower().startswith("gpt-5"): + return None + return 0.0 + + +def _aoai_temperature_candidates(deployment_name: str) -> list[float | None]: + preferred = _preferred_aoai_temperature(deployment_name) + if preferred is None: + return [None] + return [preferred, None] + + +def _call_azure_openai( + changes: list[ChangeItem], + deterministic_summary: str, + workload: str, + repo_root: str, + baseline_branch: str, + drift_branch: str, +) -> tuple[str | None, str | None]: + enabled = _env("ENABLE_PR_AI_SUMMARY", required=False, default="true").lower() == "true" + if not enabled: + return (None, None) + + endpoint = _env("AZURE_OPENAI_ENDPOINT", required=False) + deployment = _env("AZURE_OPENAI_DEPLOYMENT", required=False) + api_key = _env("AZURE_OPENAI_API_KEY", required=False) + api_version = _env("AZURE_OPENAI_API_VERSION", required=False, default="2024-12-01-preview") + max_ai_tokens = max(256, _env_int("PR_AI_MAX_TOKENS", 1200)) + ai_timeout_seconds = max(10, _env_int("PR_AI_REQUEST_TIMEOUT_SECONDS", 60)) + compact_timeout_seconds = max( + ai_timeout_seconds, + _env_int("PR_AI_COMPACT_TIMEOUT_SECONDS", max(90, ai_timeout_seconds)), + ) + minimal_timeout_seconds = max( + compact_timeout_seconds, + _env_int("PR_AI_MINIMAL_TIMEOUT_SECONDS", max(120, compact_timeout_seconds)), + ) + max_route_attempts = max(1, _env_int("PR_AI_REQUEST_MAX_ATTEMPTS", 3)) + + if not endpoint or not deployment or not api_key: + return (None, "Azure OpenAI is not configured (endpoint/deployment/api-key missing)") + + ai_candidates = [ + item for item in changes + if not _is_doc_like(item.path) and not _is_report_like(item.path) + ] + if not ai_candidates: + return (None, "AI summary skipped: only report/documentation changes detected") + + max_items = min(80, len(ai_candidates)) + shortlist = sorted( + ai_candidates, + key=lambda item: ( + -item.risk_score, + 0 if item.operation == "Deleted" else 1, + 0 if "conditional access" in item.path.lower() else 1, + item.path.lower(), + ), + )[:max_items] + compact_changes = [] + for item in shortlist: + baseline_path = item.old_path if item.operation == "Renamed" and item.old_path else item.path + old_excerpt = _load_policy_excerpt(repo_root, baseline_branch, baseline_path) + new_excerpt = _load_policy_excerpt(repo_root, drift_branch, item.path) + + semantic_change = _extract_semantic_change(old_excerpt, new_excerpt, item.path) + old_fingerprint = _policy_fingerprint(old_excerpt) + new_fingerprint = _policy_fingerprint(new_excerpt) + + compact_changes.append( + { + "operation": item.operation, + "path": item.path, + "old_path": item.old_path or "", + "policy_type": item.policy_type, + "severity": item.severity, + "risk": item.risk_label, + "reason": item.reason, + "semantic_change": semantic_change, + "old_fingerprint": old_fingerprint, + "new_fingerprint": new_fingerprint, + "fingerprint_changed": old_fingerprint != new_fingerprint, + } + ) + source = _classify_change_source(item, semantic_change) + compact_changes[-1]["change_source"] = source["label"] + compact_changes[-1]["change_source_reasons"] = source["reasons"] + compact_changes[-1]["change_source_scores"] = { + "admin": source["admin_score"], + "infrastructure": source["infrastructure_score"], + } + + change_facts = _build_change_facts(changes) + change_scope = _change_scope(changes) + security_sensitive = any(item.risk_score >= 3 for item in changes) + top_changed_areas = _detect_hotspots(changes) + posture_change = _classify_posture(changes) + change_source_assessment = _build_change_source_assessment(compact_changes) + baseline_alignment = "proposed_change_to_authoritative_config" + baseline_profile = os.environ.get("SECURITY_BASELINE_PROFILE", "authoritative-main") + + user_payload = { + "workload": workload, + "change_scope": change_scope, + "security_sensitive": security_sensitive, + "baseline_alignment": baseline_alignment, + "baseline_profile": baseline_profile, + "posture_change": posture_change, + "top_changed_areas": top_changed_areas, + "change_facts": change_facts, + "change_source_assessment": change_source_assessment, + "deterministic_summary": deterministic_summary, + "instruction": _reviewer_instruction(), + } + max_payload_bytes = max(16000, _env_int("PR_AI_PAYLOAD_MAX_BYTES", 120000)) + sampled_changes, payload_truncated = _fit_payload_budget( + payload=user_payload, + sampled_changes=compact_changes, + max_bytes=max_payload_bytes, + ) + user_payload["sampled_changes"] = sampled_changes + user_payload["sampled_changes_total"] = len(compact_changes) + user_payload["sampled_changes_used"] = len(sampled_changes) + user_payload["sampled_changes_truncated"] = payload_truncated + + endpoint = _normalize_aoai_endpoint(endpoint) + prefer_v1 = endpoint.lower().endswith(".cognitiveservices.azure.com") + + retry_http_codes = {408, 429, 500, 502, 503, 504} + + def _routes_for( + messages: list[dict[str, str]], + token_limit: int, + token_param: str, + temperature: float | None, + ) -> list[dict[str, Any]]: + deployment_body: dict[str, Any] = { + "messages": messages, + token_param: token_limit, + } + v1_body: dict[str, Any] = { + "model": deployment, + "messages": messages, + token_param: token_limit, + } + if temperature is not None: + deployment_body["temperature"] = temperature + v1_body["temperature"] = temperature + deployment_route = { + "name": "deployments", + "url": ( + endpoint.rstrip("/") + + f"/openai/deployments/{quote(deployment)}/chat/completions?api-version={quote(api_version)}" + ), + "body": deployment_body, + } + v1_route = { + "name": "v1", + "url": endpoint.rstrip("/") + "/openai/v1/chat/completions", + "body": v1_body, + } + return [v1_route, deployment_route] if prefer_v1 else [deployment_route, v1_route] + + def _run_ai_request( + messages: list[dict[str, str]], + token_limit: int, + timeout_seconds: int, + ) -> tuple[str, str]: + all_errors: list[str] = [] + token_params = _aoai_token_param_candidates(deployment) + temperature_candidates = _aoai_temperature_candidates(deployment) + for temperature in temperature_candidates: + temperature_unsupported = False + stop_after_route = False + for token_param in token_params: + route_errors: list[str] = [] + token_param_unsupported = False + for route in _routes_for(messages, token_limit, token_param, temperature): + route_error = "" + for attempt in range(1, max_route_attempts + 1): + request = Request( + url=route["url"], + method="POST", + data=json.dumps(route["body"]).encode("utf-8"), + headers={ + "Content-Type": "application/json", + "api-key": api_key, + }, + ) + try: + with urlopen(request, timeout=timeout_seconds) as response: + payload = json.loads(response.read().decode("utf-8")) + content, content_error = _extract_ai_text_from_payload(payload) + if content: + return (content, "") + route_error = f"{route['name']}: {content_error}" + break + except HTTPError as exc: # pragma: no cover + raw_body = "" + try: + raw_body = exc.read().decode("utf-8", errors="replace") + except Exception: + raw_body = "" + combined = f"{exc} {raw_body}".strip() + route_error = f"{route['name']}: {combined}" + if exc.code == 400: + raw_lower = raw_body.lower() + if "unsupported parameter" in raw_lower and f"'{token_param}'" in raw_lower: + token_param_unsupported = True + break + if "unsupported value" in raw_lower and "'temperature'" in raw_lower and temperature is not None: + temperature_unsupported = True + break + # Try alternate route when the endpoint style doesn't match. + if exc.code == 404: + break + # Invalid credentials/authorization should fail fast. + if exc.code in (401, 403): + stop_after_route = True + break + if exc.code in retry_http_codes and attempt < max_route_attempts: + delay = _retry_after_seconds(exc) + if delay is None: + delay = min(2 ** (attempt - 1), 8) + time.sleep(delay) + continue + break + except URLError as exc: # pragma: no cover + if _is_timeout_like_error(exc) and attempt < max_route_attempts: + time.sleep(min(2 ** (attempt - 1), 8)) + continue + if _is_timeout_like_error(exc): + route_error = ( + f"{route['name']}: timed out after {max_route_attempts} attempts ({exc})" + ) + else: + route_error = f"{route['name']}: {exc}" + break + except Exception as exc: # pragma: no cover + if _is_timeout_like_error(exc) and attempt < max_route_attempts: + time.sleep(min(2 ** (attempt - 1), 8)) + continue + if _is_timeout_like_error(exc): + route_error = ( + f"{route['name']}: timed out after {max_route_attempts} attempts ({exc})" + ) + else: + route_error = f"{route['name']}: {exc}" + break + + if route_error: + route_errors.append(route_error) + if token_param_unsupported or stop_after_route or temperature_unsupported: + break + all_errors.extend(route_errors) + if stop_after_route: + break + if temperature_unsupported: + break + # Only try alternate token parameter when the current one is unsupported. + if not token_param_unsupported: + break + if stop_after_route: + break + # Continue to next temperature candidate only when current one is unsupported. + if not temperature_unsupported: + break + return ("", " | ".join(all_errors).strip()) + + base_messages = [ + { + "role": "system", + "content": _reviewer_system_prompt(), + }, + {"role": "user", "content": json.dumps(user_payload, ensure_ascii=True)}, + ] + content, last_error = _run_ai_request(base_messages, max_ai_tokens, ai_timeout_seconds) + if content: + return (content, None) + + # If full payload times out, retry once with a compact payload. + timed_out = "timed out" in last_error.lower() or "timeout" in last_error.lower() + if timed_out and len(sampled_changes) > 12: + compact_limit = max(8, _env_int("PR_AI_COMPACT_CHANGE_LIMIT", 12)) + compact_token_limit = max(256, min(max_ai_tokens, _env_int("PR_AI_COMPACT_MAX_TOKENS", 600))) + compact_payload = dict(user_payload) + compact_payload["sampled_changes"] = sampled_changes[:compact_limit] + compact_payload["sampled_changes_used"] = len(compact_payload["sampled_changes"]) + compact_payload["sampled_changes_truncated"] = True + compact_messages = [ + { + "role": "system", + "content": _reviewer_system_prompt(), + }, + {"role": "user", "content": json.dumps(compact_payload, ensure_ascii=True)}, + ] + compact_content, compact_error = _run_ai_request( + compact_messages, + compact_token_limit, + compact_timeout_seconds, + ) + if compact_content: + return (compact_content, None) + last_error = f"{last_error} | compact-retry: {compact_error}".strip(" |") + + # Last-resort minimal prompt for reliability when richer payloads fail. + auth_like_error = any(marker in last_error.lower() for marker in ("401", "403", "unauthorized", "forbidden")) + if not auth_like_error: + minimal_change_limit = max(3, _env_int("PR_AI_MINIMAL_CHANGE_LIMIT", 5)) + minimal_token_limit = max(256, min(max_ai_tokens, _env_int("PR_AI_MINIMAL_MAX_TOKENS", 400))) + minimal_changes = [] + for item in sampled_changes[:minimal_change_limit]: + minimal_changes.append( + { + "operation": item.get("operation"), + "path": item.get("path"), + "risk": item.get("risk"), + "semantic_change": item.get("semantic_change"), + "change_source": item.get("change_source"), + "change_source_reasons": item.get("change_source_reasons"), + } + ) + minimal_payload = { + "workload": workload, + "change_scope": change_scope, + "security_sensitive": security_sensitive, + "posture_change": posture_change, + "top_changed_areas": top_changed_areas, + "change_source_assessment": change_source_assessment, + "deterministic_summary": _compact_deterministic_summary(deterministic_summary), + "changes": minimal_changes, + "instruction": _minimal_reviewer_instruction(), + } + minimal_messages = [ + { + "role": "system", + "content": _reviewer_system_prompt() + " Prioritize clarity, practical risk framing, and reviewer actionability.", + }, + {"role": "user", "content": json.dumps(minimal_payload, ensure_ascii=True)}, + ] + minimal_content, minimal_error = _run_ai_request( + minimal_messages, + minimal_token_limit, + minimal_timeout_seconds, + ) + if minimal_content: + return (minimal_content, None) + if minimal_error: + last_error = f"{last_error} | minimal-retry: {minimal_error}".strip(" |") + + print(f"WARNING: Azure OpenAI summary fallback triggered: {last_error}") + if "finish_reason=length" in last_error: + fallback_reason = ( + f"Azure OpenAI response was cut by token limit ({last_error}); " + "consider increasing PR_AI_MAX_TOKENS" + ) + elif not last_error: + fallback_reason = "Azure OpenAI returned no usable text output" + else: + fallback_reason = f"Azure OpenAI unavailable ({last_error})" + fallback = _build_fallback_narrative( + workload=workload, + changes=changes, + compact_changes=compact_changes, + top_changed_areas=top_changed_areas, + change_source_assessment=change_source_assessment, + fallback_reason=fallback_reason, + ) + return (fallback, None) + + +def _upsert_marked_block(description: str, block: str, start_marker: str, end_marker: str) -> str: + description = description or "" + pattern = re.compile( + re.escape(start_marker) + r".*?" + re.escape(end_marker), + flags=re.DOTALL, + ) + if pattern.search(description): + return pattern.sub(block, description) + if description.endswith("\n"): + return description + "\n" + block + "\n" + if description: + return description + "\n\n" + block + "\n" + return block + "\n" + + +def _upsert_auto_block(description: str, auto_block: str) -> str: + description = description or "" + cleaned = _remove_marked_block(description, AUTO_BLOCK_START, AUTO_BLOCK_END) + marker = "## Reviewer Quick Actions" + idx = cleaned.find(marker) + block = auto_block.strip() + if idx == -1: + if not cleaned: + return block + "\n" + if cleaned.endswith("\n"): + return cleaned + "\n" + block + "\n" + return cleaned + "\n\n" + block + "\n" + + prefix = cleaned[:idx].rstrip() + suffix = cleaned[idx:].lstrip() + parts: list[str] = [] + if prefix: + parts.append(prefix) + parts.append(block) + if suffix: + parts.append(suffix) + return "\n\n".join(parts).strip() + "\n" + + +def _publish_draft_pr( + repo_api: str, + token: str, + pr_id: int, + title: str, + description: str, + is_draft: bool, +) -> bool: + if not is_draft or not _delay_reviewer_notifications_enabled(): + return False + _request_json( + f"{repo_api}/pullrequests/{pr_id}?api-version=7.1", + token=token, + method="PATCH", + body={ + "title": title, + "description": description, + "isDraft": False, + }, + ) + return True + + +def _existing_change_fingerprint(description: str) -> str: + description = description or "" + block_pattern = re.compile( + re.escape(AUTO_BLOCK_START) + r"(?P.*?)" + re.escape(AUTO_BLOCK_END), + flags=re.DOTALL, + ) + match = block_pattern.search(description) + if not match: + return "" + + body = match.group("body") + fingerprint_pattern = re.compile(r"\*\*Change Fingerprint:\*\*\s*`(?P[a-fA-F0-9]+)`") + fingerprint_match = fingerprint_pattern.search(body) + if not fingerprint_match: + return "" + return fingerprint_match.group("fp").strip().lower() + + +def _existing_summary_version(description: str) -> str: + body = _auto_block_body(description) + if not body: + return "" + version_pattern = re.compile(r"\*\*Summary Version:\*\*\s*`(?P[^`]+)`") + version_match = version_pattern.search(body) + if not version_match: + return "" + return version_match.group("version").strip() + + +def _auto_block_body(description: str) -> str: + description = description or "" + block_pattern = re.compile( + re.escape(AUTO_BLOCK_START) + r"(?P.*?)" + re.escape(AUTO_BLOCK_END), + flags=re.DOTALL, + ) + match = block_pattern.search(description) + if not match: + return "" + return match.group("body") + + +def _auto_block_contains_ai_fallback(body: str) -> bool: + if not body: + return False + lowered = body.lower() + return ("ai fallback used:" in lowered) or ("ai summary unavailable:" in lowered) + + +def _compact_deterministic_summary(deterministic_summary: str) -> str: + marker = "\n### Top Risk Items" + idx = deterministic_summary.find(marker) + if idx == -1: + return deterministic_summary.strip() + return deterministic_summary[:idx].strip() + + +def _remove_marked_block(description: str, start_marker: str, end_marker: str) -> str: + description = description or "" + pattern = re.compile( + r"\n*" + + re.escape(start_marker) + + r".*?" + + re.escape(end_marker) + + r"\n*", + flags=re.DOTALL, + ) + cleaned = pattern.sub("\n\n", description) + cleaned = re.sub(r"\n{3,}", "\n\n", cleaned) + return cleaned.strip("\n") + + +def _ticket_marker_for_path(path: str) -> str: + encoded = base64.urlsafe_b64encode(path.encode("utf-8")).decode("ascii").rstrip("=") + return f"Automation marker: {AUTO_TICKET_THREAD_PREFIX}{encoded}" + + +def _path_from_ticket_marker(content: str) -> str | None: + marker_re = re.compile( + r"(?:^|\n)\s*(?:Automation marker:\s*)?" + + re.escape(AUTO_TICKET_THREAD_PREFIX) + + r"(?P[A-Za-z0-9_-]+)\s*(?:$|\n)" + ) + match = marker_re.search(content or "") + if not match: + return None + encoded = match.group("id") + padding = "=" * ((4 - len(encoded) % 4) % 4) + try: + return base64.urlsafe_b64decode((encoded + padding).encode("ascii")).decode("utf-8") + except Exception: + return None + + +def _ai_review_thread_marker(workload: str) -> str: + return f"Automation marker: {AUTO_AI_REVIEW_THREAD_PREFIX}{workload.strip().lower()}" + + +def _thread_has_matching_comment(comments: list[dict[str, Any]], content: str) -> bool: + expected = content.strip() + if not expected: + return False + for comment in comments: + current = str(comment.get("content", "") or "").strip() + if current == expected: + return True + return False + + +def _find_marked_thread(threads: list[dict[str, Any]], marker: str) -> dict[str, Any] | None: + for thread in threads: + comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else [] + for comment in comments: + content = str(comment.get("content", "") or "") + if marker in content: + return thread + return None + + +def _thread_status_code(thread: dict[str, Any]) -> int: + status = thread.get("status") + if isinstance(status, int): + return status + if isinstance(status, str): + lowered = status.strip().lower() + mapping = { + "active": THREAD_STATUS_ACTIVE, + "fixed": THREAD_STATUS_FIXED, + "wontfix": THREAD_STATUS_WONT_FIX, + "closed": THREAD_STATUS_CLOSED, + "bydesign": THREAD_STATUS_BY_DESIGN, + "pending": THREAD_STATUS_PENDING, + } + return mapping.get(lowered, THREAD_STATUS_ACTIVE) + return THREAD_STATUS_ACTIVE + + +def _is_thread_resolved(thread: dict[str, Any]) -> bool: + status = _thread_status_code(thread) + return status in (THREAD_STATUS_FIXED, THREAD_STATUS_WONT_FIX, THREAD_STATUS_CLOSED, THREAD_STATUS_BY_DESIGN) + + +def _extract_thread_ticket(comments: list[dict[str, Any]], ticket_re: re.Pattern[str]) -> str: + for comment in comments: + content = str(comment.get("content", "") or "") + match = ticket_re.search(content) + if match: + return match.group(0) + return "" + + +def _create_ticket_thread( + repo_api: str, + pr_id: int, + token: str, + path: str, + ticket_pattern: str, + change_summary: str, + risk_summary: str, +) -> None: + marker = _ticket_marker_for_path(path) + _debug(f"Creating ticket thread for path: {path}") + content = ( + "Change needed\n\n" + f"Policy file: {path}\n\n" + f"Detected change (auto): {change_summary}\n\n" + f"Risk context: {risk_summary}\n\n" + "Please reply with the related change ticket ID in this thread.\n" + "Use /reject if this specific policy change should be excluded from the rolling PR.\n" + "Use /accept to keep it in PR scope.\n" + "Resolve this thread after reviewer confirmation.\n\n" + f"Suggested ticket format: {ticket_pattern}\n\n" + f"{marker}" + ) + _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", + token=token, + method="POST", + body={ + "comments": [ + { + "parentCommentId": 0, + "content": content, + "commentType": 1, + } + ], + "status": THREAD_STATUS_ACTIVE, + }, + ) + + +def _plain_text_ai_narrative(text: str) -> str: + formatted = _format_ai_narrative_markdown(text) + if not formatted: + return "" + lines: list[str] = [] + for raw in formatted.splitlines(): + line = raw.rstrip() + if line.startswith("#### "): + lines.append(line[5:] + ":") + continue + lines.append(line) + return "\n".join(lines).strip() + + +def _build_full_ai_review_thread_content(workload: str, ai_summary: str) -> str: + marker = _ai_review_thread_marker(workload) + plain_ai = _plain_text_ai_narrative(ai_summary) + return ( + "AI reviewer narrative (full)\n\n" + "PR description uses a compact review summary because of Azure DevOps description size limits.\n\n" + f"{plain_ai}\n\n" + f"{marker}" + ).strip() + + +def _create_ai_review_thread( + repo_api: str, + pr_id: int, + token: str, + workload: str, + ai_summary: str, +) -> None: + content = _build_full_ai_review_thread_content(workload, ai_summary) + _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", + token=token, + method="POST", + body={ + "comments": [ + { + "parentCommentId": 0, + "content": content, + "commentType": 1, + } + ], + "status": THREAD_STATUS_ACTIVE, + }, + ) + + +def _add_thread_comment( + repo_api: str, + pr_id: int, + thread_id: int, + token: str, + content: str, +) -> None: + _debug(f"Adding comment to thread_id={thread_id}") + _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads/{thread_id}/comments?api-version=7.1", + token=token, + method="POST", + body={ + "parentCommentId": 0, + "content": content, + "commentType": 1, + }, + ) + + +def _sync_full_ai_review_thread( + repo_api: str, + pr_id: int, + token: str, + workload: str, + ai_summary: str, +) -> bool: + marker = _ai_review_thread_marker(workload) + desired_content = _build_full_ai_review_thread_content(workload, ai_summary) + threads_payload = _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", + token=token, + ) + threads = threads_payload.get("value", []) if isinstance(threads_payload, dict) else [] + thread = _find_marked_thread(threads, marker) + if thread is None: + _create_ai_review_thread(repo_api, pr_id, token, workload, ai_summary) + return True + + comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else [] + if _thread_has_matching_comment(comments, desired_content): + return False + + thread_id = _thread_id(thread) + if thread_id <= 0: + _create_ai_review_thread(repo_api, pr_id, token, workload, ai_summary) + return True + + if _is_thread_resolved(thread): + _set_thread_status(repo_api, pr_id, thread_id, token, THREAD_STATUS_ACTIVE) + _add_thread_comment(repo_api, pr_id, thread_id, token, desired_content) + return True + + +def _set_thread_status( + repo_api: str, + pr_id: int, + thread_id: int, + token: str, + status: int, +) -> None: + _debug(f"Updating thread status: thread_id={thread_id}, status={status}") + _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads/{thread_id}?api-version=7.1", + token=token, + method="PATCH", + body={"status": status}, + ) + + +def _has_matching_detected_change_comment( + comments: list[dict[str, Any]], + change_summary: str, + risk_summary: str, +) -> bool: + expected_change = f"Detected change (auto): {change_summary}" + expected_risk = f"Risk context: {risk_summary}" + for comment in comments: + content = str(comment.get("content", "") or "") + if expected_change in content and expected_risk in content: + return True + return False + + +def _thread_id(thread: dict[str, Any]) -> int: + try: + tid = thread.get("id") + if tid is None: + return -1 + return int(tid) + except Exception: + return -1 + + +def _build_ticket_change_context( + repo_root: str, + baseline_branch: str, + drift_branch: str, + changes: list[ChangeItem], +) -> dict[str, tuple[str, str]]: + context: dict[str, tuple[str, str]] = {} + + for item in changes: + if _is_doc_like(item.path) or _is_report_like(item.path): + continue + baseline_path = item.old_path if item.operation == "Renamed" and item.old_path else item.path + old_excerpt = _load_policy_excerpt(repo_root, baseline_branch, baseline_path) + new_excerpt = _load_policy_excerpt(repo_root, drift_branch, item.path) + semantic = _extract_semantic_change(old_excerpt, new_excerpt, item.path).strip() + if not semantic or semantic == "No semantic key changes detected": + semantic = "configuration content modified" + if len(semantic) > 320: + semantic = semantic[:317] + "..." + change_summary = f"{item.operation}: {semantic}" + risk_summary = f"{item.risk_label} ({item.policy_type}): {item.reason}" + context[item.path] = (change_summary, risk_summary) + + return context + + +def _enforce_change_ticket_threads( + repo_api: str, + pr_id: int, + token: str, + changes: list[ChangeItem], + ticket_pattern: str, + change_context: dict[str, tuple[str, str]], +) -> tuple[int, int]: + tracked_paths = sorted( + {item.path for item in changes if not _is_doc_like(item.path) and not _is_report_like(item.path)} + ) + tracked_set = set(tracked_paths) + _debug(f"Tracked changed paths: count={len(tracked_paths)}") + if tracked_paths: + preview = "; ".join(tracked_paths[:10]) + _debug(f"Tracked paths preview: {preview}") + + threads_payload = _request_json( + f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", + token=token, + ) + threads = threads_payload.get("value", []) + _debug(f"Fetched PR threads: total={len(threads)}") + + auto_threads_by_path: dict[str, dict[str, Any]] = {} + for thread in threads: + comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else [] + marker_path: str | None = None + for comment in comments: + content = str(comment.get("content", "") or "") + marker_path = _path_from_ticket_marker(content) + if marker_path: + break + if marker_path: + auto_threads_by_path[marker_path] = thread + _debug(f"Detected auto ticket threads: total={len(auto_threads_by_path)}") + + # Close stale auto-generated threads when file is no longer part of drift. + closed_count = 0 + for path, thread in auto_threads_by_path.items(): + if path in tracked_set: + continue + if not _is_thread_resolved(thread): + thread_id = _thread_id(thread) + if thread_id > 0: + _debug(f"Closing stale auto thread: thread_id={thread_id}, path={path}") + _set_thread_status(repo_api, pr_id, thread_id, token, THREAD_STATUS_CLOSED) + closed_count += 1 + + created_count = 0 + for path in tracked_paths: + thread = auto_threads_by_path.get(path) + summary_tuple = change_context.get(path, ("`Modified` configuration content modified", "n/a")) + change_summary, risk_summary = summary_tuple + if not thread: + _create_ticket_thread( + repo_api=repo_api, + pr_id=pr_id, + token=token, + path=path, + ticket_pattern=ticket_pattern, + change_summary=change_summary, + risk_summary=risk_summary, + ) + created_count += 1 + else: + if _is_thread_resolved(thread): + thread_id = _thread_id(thread) + if thread_id > 0: + _debug(f"Re-opening resolved auto thread for changed path: thread_id={thread_id}, path={path}") + _set_thread_status(repo_api, pr_id, thread_id, token, THREAD_STATUS_ACTIVE) + _add_thread_comment( + repo_api=repo_api, + pr_id=pr_id, + thread_id=thread_id, + token=token, + content=( + "Policy drift changed again for this path; re-opening thread for fresh review. " + "Use `/accept` or `/reject` and resolve after decision." + ), + ) + comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else [] + has_matching_change = _has_matching_detected_change_comment( + comments=comments, + change_summary=change_summary, + risk_summary=risk_summary, + ) + if not has_matching_change: + thread_id = _thread_id(thread) + if thread_id > 0: + _add_thread_comment( + repo_api=repo_api, + pr_id=pr_id, + thread_id=thread_id, + token=token, + content=( + f"Detected change (auto): {change_summary}\n\n" + f"Risk context: {risk_summary}" + ), + ) + _debug( + f"Auto thread already exists for path={path} " + f"(thread_id={_thread_id(thread)}, resolved={_is_thread_resolved(thread)})" + ) + + return (created_count, closed_count) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Update rolling PR reviewer summary") + parser.add_argument("--repo-root", required=True) + parser.add_argument("--workload", required=True) + parser.add_argument("--backup-folder", required=True) + parser.add_argument("--reports-subdir", required=True) + parser.add_argument("--drift-branch", required=True) + parser.add_argument("--baseline-branch", required=True) + args = parser.parse_args() + require_ticket_gate = _env_bool("REQUIRE_CHANGE_TICKETS") + _debug( + "Ticket gate flags: " + f"REQUIRE_CHANGE_TICKETS={os.environ.get('REQUIRE_CHANGE_TICKETS', '')!r}, " + f"effective={require_ticket_gate}" + ) + + try: + baseline_branch = _normalize_branch_name(args.baseline_branch) + drift_branch = _normalize_branch_name(args.drift_branch) + + token = _env("SYSTEM_ACCESSTOKEN") + collection_uri = _env("SYSTEM_COLLECTIONURI").rstrip("/") + project = _env("SYSTEM_TEAMPROJECT") + repository_id = _env("BUILD_REPOSITORY_ID") + + source_ref = f"refs/heads/{drift_branch}" + target_ref = f"refs/heads/{baseline_branch}" + repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}" + + query = urlencode( + { + "searchCriteria.status": "active", + "searchCriteria.sourceRefName": source_ref, + "searchCriteria.targetRefName": target_ref, + "api-version": "7.1", + }, + safe="/", + ) + existing = _request_json(f"{repo_api}/pullrequests?{query}", token=token) + items = existing.get("value", []) + if not items: + print("No active rolling PR found; skipping summary update.") + return 0 + + pr = items[0] + pr_id = pr.get("pullRequestId") + if not pr_id: + print("Active PR without pullRequestId; skipping summary update.") + return 0 + _debug(f"Active rolling PR detected: pr_id={pr_id}, source={source_ref}, target={target_ref}") + + _run_git(args.repo_root, ["fetch", "--quiet", "origin", baseline_branch, drift_branch]) + diff_output = _run_diff_name_status(args.repo_root, baseline_branch, drift_branch) + changes = _parse_changes(diff_output, args.backup_folder, args.reports_subdir) + _debug(f"Parsed non-doc/report changes for summary: count={len(changes)}") + changes, ignored_operational_count = _filter_operational_noise_changes( + repo_root=args.repo_root, + baseline_branch=baseline_branch, + drift_branch=drift_branch, + workload=args.workload, + changes=changes, + ) + if ignored_operational_count > 0: + print( + "Ignored operational-only drift changes from summary/ticket scope: " + f"{ignored_operational_count}" + ) + changes_fingerprint = _changes_fingerprint(changes).lower() + deterministic = _build_deterministic_summary( + changes, + drift_branch, + baseline_branch, + ignored_operational_count=ignored_operational_count, + ) + + full_pr = _request_json(f"{repo_api}/pullrequests/{pr_id}?api-version=7.1", token=token) + current_description = full_pr.get("description", "") + pr_is_draft = bool(full_pr.get("isDraft")) + existing_fingerprint = _existing_change_fingerprint(current_description) + existing_summary_version = _existing_summary_version(current_description) + current_auto_body = _auto_block_body(current_description) + deterministic_already_present = deterministic in current_auto_body if current_auto_body else False + ai_fallback_in_current_block = _auto_block_contains_ai_fallback(current_auto_body) + refresh_on_fallback = _env_bool("PR_AI_FORCE_REFRESH_ON_FALLBACK", default=True) + if existing_fingerprint and existing_fingerprint == changes_fingerprint: + summary_version_matches = existing_summary_version == AUTO_SUMMARY_VERSION + should_skip = deterministic_already_present and summary_version_matches + if refresh_on_fallback and ai_fallback_in_current_block: + should_skip = False + + if should_skip: + published = _publish_draft_pr( + repo_api=repo_api, + token=token, + pr_id=int(pr_id), + title=full_pr.get("title", pr.get("title", f"{args.workload} drift review (rolling)")), + description=current_description, + is_draft=pr_is_draft, + ) + if published: + print(f"Published draft PR #{pr_id} after confirming summary was already up to date.") + print( + f"Automated review summary fingerprint unchanged for PR #{pr_id} " + f"({args.workload}); skipping description update." + ) + if require_ticket_gate: + ticket_pattern = _env("CHANGE_TICKET_REGEX", required=False, default=r"[A-Z][A-Z0-9]+-\d+") + change_context = _build_ticket_change_context( + repo_root=args.repo_root, + baseline_branch=baseline_branch, + drift_branch=drift_branch, + changes=changes, + ) + created_count, closed_count = _enforce_change_ticket_threads( + repo_api=repo_api, + pr_id=pr_id, + token=token, + changes=changes, + ticket_pattern=ticket_pattern, + change_context=change_context, + ) + print( + "Change-needed thread sync complete: " + f"created={created_count}, closed_stale={closed_count}. " + "Merge policy should enforce unresolved thread handling." + ) + else: + print("Change-needed thread sync disabled (set REQUIRE_CHANGE_TICKETS=true).") + return 0 + if deterministic_already_present and refresh_on_fallback and ai_fallback_in_current_block: + print( + f"Automated review summary fingerprint unchanged for PR #{pr_id} ({args.workload}), " + "but prior AI fallback marker detected; retrying AI narrative refresh." + ) + elif not summary_version_matches: + print( + f"Automated review summary fingerprint unchanged for PR #{pr_id} ({args.workload}), " + f"but summary version changed ({existing_summary_version or 'unversioned'} -> {AUTO_SUMMARY_VERSION}); refreshing description." + ) + else: + print( + f"Automated review summary fingerprint unchanged for PR #{pr_id} ({args.workload}), " + "but summary format/content changed; refreshing description." + ) + + ai_summary, ai_error = _call_azure_openai( + changes, + deterministic, + args.workload, + args.repo_root, + baseline_branch, + drift_branch, + ) + + auto_lines = [ + AUTO_BLOCK_START, + f"## Automated Review Summary ({args.workload})", + "", + f"- **Summary Version:** `{AUTO_SUMMARY_VERSION}`", + deterministic, + ] + if ai_summary: + formatted_ai = _format_ai_narrative_markdown(ai_summary) + auto_lines.extend(["", "### AI Reviewer Narrative", formatted_ai]) + elif ai_error: + auto_lines.extend(["", f"_AI summary unavailable: {ai_error}_"]) + auto_lines.append(AUTO_BLOCK_END) + auto_block = "\n".join(auto_lines) + updated_description = _upsert_auto_block(current_description, auto_block) + # Cleanup legacy description-based ticket checklist if present. + updated_description = _remove_marked_block(updated_description, TICKET_BLOCK_START, TICKET_BLOCK_END) + + patch_url = f"{repo_api}/pullrequests/{pr_id}?api-version=7.1" + patch_title = full_pr.get("title", pr.get("title", f"{args.workload} drift review (rolling)")) + summary_updated = False + final_description = current_description + description_compacted = False + if updated_description != current_description: + try: + _request_json( + patch_url, + token=token, + method="PATCH", + body={ + "title": patch_title, + "description": updated_description, + }, + ) + summary_updated = True + final_description = updated_description + except RuntimeError as exc: + if not _is_description_limit_error(exc): + raise + description_compacted = True + compact_ai_block = "" + if ai_summary: + compact_ai_block = "\n### AI Reviewer Narrative\n" + COMPACT_AI_THREAD_NOTE + elif ai_error: + compact_ai_block = f"\n_AI summary unavailable: {ai_error}_" + compact_auto_block = "\n".join( + [ + AUTO_BLOCK_START, + f"## Automated Review Summary ({args.workload})", + "", + f"- **Summary Version:** `{AUTO_SUMMARY_VERSION}`", + _compact_deterministic_summary(deterministic), + compact_ai_block, + AUTO_BLOCK_END, + ] + ) + compact_description = _upsert_auto_block(current_description, compact_auto_block) + compact_description = _remove_marked_block( + compact_description, TICKET_BLOCK_START, TICKET_BLOCK_END + ) + if compact_description == updated_description: + raise + print( + "WARNING: Full PR summary update failed; retrying with compact summary block. " + f"Reason: {exc}" + ) + try: + _request_json( + patch_url, + token=token, + method="PATCH", + body={ + "title": patch_title, + "description": compact_description, + }, + ) + summary_updated = True + final_description = compact_description + except RuntimeError as compact_exc: + if not _is_description_limit_error(compact_exc): + raise + ultra_compact_block = "\n".join( + [ + AUTO_BLOCK_START, + f"## Automated Review Summary ({args.workload})", + "", + f"- **Summary Version:** `{AUTO_SUMMARY_VERSION}`", + _compact_deterministic_summary(deterministic), + "", + COMPACT_AI_THREAD_NOTE, + AUTO_BLOCK_END, + ] + ) + ultra_compact_description = _upsert_auto_block(current_description, ultra_compact_block) + ultra_compact_description = _remove_marked_block( + ultra_compact_description, TICKET_BLOCK_START, TICKET_BLOCK_END + ) + print("WARNING: Compact summary still too large; retrying with ultra-compact block.") + _request_json( + patch_url, + token=token, + method="PATCH", + body={ + "title": patch_title, + "description": ultra_compact_description, + }, + ) + summary_updated = True + final_description = ultra_compact_description + else: + final_description = updated_description + + if summary_updated: + print(f"Updated automated review summary for PR #{pr_id} ({args.workload}).") + else: + print(f"Automated review summary already up to date for PR #{pr_id} ({args.workload}).") + if ai_summary and description_compacted: + try: + thread_updated = _sync_full_ai_review_thread( + repo_api=repo_api, + pr_id=int(pr_id), + token=token, + workload=args.workload, + ai_summary=ai_summary, + ) + if thread_updated: + print(f"Updated full AI reviewer narrative thread for PR #{pr_id} ({args.workload}).") + else: + print(f"Full AI reviewer narrative thread already up to date for PR #{pr_id} ({args.workload}).") + except Exception as exc: + print(f"WARNING: Failed to sync full AI reviewer narrative thread for PR #{pr_id}: {exc}") + if _publish_draft_pr( + repo_api=repo_api, + token=token, + pr_id=int(pr_id), + title=patch_title, + description=final_description, + is_draft=pr_is_draft, + ): + print(f"Published draft PR #{pr_id} after automated review summary update.") + if require_ticket_gate: + ticket_pattern = _env("CHANGE_TICKET_REGEX", required=False, default=r"[A-Z][A-Z0-9]+-\d+") + change_context = _build_ticket_change_context( + repo_root=args.repo_root, + baseline_branch=baseline_branch, + drift_branch=drift_branch, + changes=changes, + ) + created_count, closed_count = _enforce_change_ticket_threads( + repo_api=repo_api, + pr_id=pr_id, + token=token, + changes=changes, + ticket_pattern=ticket_pattern, + change_context=change_context, + ) + print( + "Change-needed thread sync complete: " + f"created={created_count}, closed_stale={closed_count}. " + "Merge policy should enforce unresolved thread handling." + ) + else: + print("Change-needed thread sync disabled (set REQUIRE_CHANGE_TICKETS=true).") + return 0 + except Exception as exc: + # Non-fatal on purpose: backup and PR flow should continue even if summary generation fails. + print(f"WARNING: Failed to update automated review summary: {exc}", file=sys.stderr) + if require_ticket_gate: + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/validate_backup_outputs.py b/scripts/validate_backup_outputs.py new file mode 100644 index 0000000..fd47071 --- /dev/null +++ b/scripts/validate_backup_outputs.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +"""Validate backup outputs for Intune and Entra workloads.""" + +from __future__ import annotations + +import argparse +from pathlib import Path + + +def to_bool(value: str) -> bool: + return str(value).strip().lower() in {"1", "true", "yes", "y", "on"} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--workload", required=True, choices=["intune", "entra"]) + parser.add_argument("--mode", default="light", choices=["light", "full"]) + parser.add_argument("--root", required=True, help="Workload backup root path.") + parser.add_argument("--reports-root", required=True, help="Workload reports root path.") + parser.add_argument("--include-named-locations", default="false") + parser.add_argument("--include-authentication-strengths", default="false") + parser.add_argument("--include-conditional-access", default="false") + parser.add_argument("--include-enterprise-applications", default="false") + parser.add_argument("--include-enterprise-applications-effective", default="false") + parser.add_argument("--include-app-registrations", default="false") + parser.add_argument("--include-app-registrations-effective", default="false") + return parser.parse_args() + + +def _require_file(path: Path, label: str, errors: list[str]) -> None: + if not path.is_file(): + errors.append(f"Missing {label}: {path}") + + +def _json_count(root: Path) -> int: + if not root.exists(): + return 0 + return sum(1 for _ in root.rglob("*.json")) + + +def _validate_intune(root: Path, reports_root: Path, errors: list[str]) -> None: + if not root.exists(): + errors.append(f"Missing Intune backup root: {root}") + return + + json_count = _json_count(root) + if json_count == 0: + errors.append(f"Intune backup root has no JSON exports: {root}") + + _require_file(reports_root / "policy-assignments.md", "Intune assignment markdown report", errors) + _require_file(reports_root / "policy-assignments.csv", "Intune assignment CSV report", errors) + _require_file(reports_root / "object-inventory-all.csv", "Intune object inventory CSV", errors) + + if errors: + return + print(f"Intune output validation passed: jsonFiles={json_count}") + + +def _validate_entra(root: Path, reports_root: Path, args: argparse.Namespace, errors: list[str]) -> None: + if not root.exists(): + errors.append(f"Missing Entra backup root: {root}") + return + + include_named_locations = to_bool(args.include_named_locations) + include_auth_strengths = to_bool(args.include_authentication_strengths) + include_conditional_access = to_bool(args.include_conditional_access) + include_enterprise_apps = to_bool(args.include_enterprise_applications) + include_enterprise_apps_effective = to_bool(args.include_enterprise_applications_effective) + include_app_registrations = to_bool(args.include_app_registrations) + include_app_registrations_effective = to_bool(args.include_app_registrations_effective) + + expected_category_indexes: list[tuple[str, bool]] = [ + ("Named Locations", include_named_locations), + ("Authentication Strengths", include_auth_strengths), + ("Conditional Access", include_conditional_access), + ("App Registrations", include_app_registrations_effective), + ("Enterprise Applications", include_enterprise_apps_effective), + ] + + for category_name, is_required in expected_category_indexes: + if not is_required: + continue + index_path = root / category_name / f"{category_name}.md" + _require_file(index_path, f"Entra export index for '{category_name}'", errors) + + _require_file(reports_root / "object-inventory-all.csv", "Entra object inventory CSV", errors) + + if include_conditional_access: + _require_file(reports_root / "policy-assignments.md", "Entra assignment markdown report", errors) + _require_file(reports_root / "policy-assignments.csv", "Entra assignment CSV report", errors) + + if include_app_registrations_effective or include_enterprise_apps_effective: + _require_file(reports_root / "apps-inventory.csv", "Entra apps inventory CSV", errors) + + if errors: + return + + json_count = _json_count(root) + print( + "Entra output validation passed: " + f"jsonFiles={json_count}, " + f"mode={args.mode}, " + f"enterpriseAppsConfigured={str(include_enterprise_apps).lower()}, " + f"enterpriseAppsEffective={str(include_enterprise_apps_effective).lower()}, " + f"appRegistrationsConfigured={str(include_app_registrations).lower()}, " + f"appRegistrationsEffective={str(include_app_registrations_effective).lower()}" + ) + + +def main() -> int: + args = parse_args() + root = Path(args.root).resolve() + reports_root = Path(args.reports_root).resolve() + errors: list[str] = [] + + if args.workload == "intune": + _validate_intune(root=root, reports_root=reports_root, errors=errors) + else: + _validate_entra(root=root, reports_root=reports_root, args=args, errors=errors) + + if errors: + print("Backup output validation failed:") + for item in errors: + print(f" - {item}") + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/templates/variables-common.yml b/templates/variables-common.yml new file mode 100644 index 0000000..b5b193a --- /dev/null +++ b/templates/variables-common.yml @@ -0,0 +1,48 @@ +# Common variables shared across backup and review-sync pipelines. +# Include with: variables: [ template: templates/variables-common.yml ] + +variables: + - name: BASELINE_BRANCH + value: main + - name: DRIFT_BRANCH_INTUNE + value: drift/intune + - name: DRIFT_BRANCH_ENTRA + value: drift/entra + - name: BACKUP_FOLDER + value: tenant-state + - name: REPORTS_SUBDIR + value: reports + - name: ENABLE_WORKLOAD_INTUNE + value: true + - name: ENABLE_WORKLOAD_ENTRA + value: true + - name: ENABLE_PR_REVIEW_SUMMARY + value: true + - name: ENABLE_PR_REVIEWER_DECISIONS + value: true + - name: ENABLE_PR_AI_SUMMARY + value: true + - name: ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS + value: true + - name: REQUIRE_CHANGE_TICKETS + value: false + - name: CHANGE_TICKET_REGEX + value: "[A-Z][A-Z0-9]+-[0-9]+" + - name: DEBUG_CHANGE_TICKET_THREADS + value: false + - name: AZURE_OPENAI_API_VERSION + value: "2024-12-01-preview" + - name: AUTO_REMEDIATE_AFTER_MERGE + value: true + - name: AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS + value: 168 + - name: AUTO_REMEDIATE_DRY_RUN + value: false + - name: AUTO_REMEDIATE_UPDATE_ASSIGNMENTS + value: true + - name: AUTO_REMEDIATE_REMOVE_OBJECTS + value: false + - name: AUTO_REMEDIATE_MAX_WORKERS + value: 10 + - name: AUTO_REMEDIATE_EXCLUDE_CSV + value: "" diff --git a/templates/variables-tenant.yml b/templates/variables-tenant.yml new file mode 100644 index 0000000..ede6f33 --- /dev/null +++ b/templates/variables-tenant.yml @@ -0,0 +1,59 @@ +# Tenant-specific variables for ASTRAL +# +# Copy these variables into an Azure DevOps Variable Group (e.g. vg-astral-tenant) +# and reference that group in your pipeline YAMLs. Do not commit secrets to Git. +# +# Example pipeline reference: +# variables: +# - group: vg-astral-tenant +# - template: templates/variables-common.yml + +variables: + # Required: Microsoft 365 tenant domain + - name: TENANT_NAME + value: contoso.onmicrosoft.com + + # Required: Azure DevOps service connection name (workload federated credential) + - name: SERVICE_CONNECTION_NAME + value: sc-astral-backup + + # Required: Git commit identity used by the pipeline + - name: USER_NAME + value: ASTRAL Backup Service + + # Required: Git commit email used by the pipeline + - name: USER_EMAIL + value: astral-backup@contoso.com + + # Optional: Agent pool name. Default uses Azure-hosted agents. + - name: AGENT_POOL_NAME + value: Azure Pipelines + + # Optional: Timezone for light/full run decisions. Must be a valid tz database name. + - name: BACKUP_TIMEZONE + value: Europe/Prague + + # Optional: Full-run hour in BACKUP_TIMEZONE (24h format, zero-padded). + # The main pipeline runs hourly; only this hour triggers a full export. + - name: FULL_RUN_HOUR + value: "00" + + # Optional: Cron schedule for the main backup pipeline. + - name: SCHEDULE_CRON + value: "0 * * * *" + + # Optional but recommended: pipeline definition ID of azure-pipelines-restore.yml. + # Set this after you have imported the restore pipeline into Azure DevOps. + - name: AUTO_REMEDIATE_RESTORE_PIPELINE_ID + value: "" + + # Optional: Azure OpenAI settings for AI-assisted PR summaries. + # Store AZURE_OPENAI_API_KEY as a secret variable. + - name: ENABLE_PR_AI_SUMMARY + value: false + - name: AZURE_OPENAI_ENDPOINT + value: "" + - name: AZURE_OPENAI_DEPLOYMENT + value: "" + - name: AZURE_OPENAI_API_KEY + value: "" diff --git a/tenant-state/README.md b/tenant-state/README.md new file mode 100644 index 0000000..9cb4a67 --- /dev/null +++ b/tenant-state/README.md @@ -0,0 +1,4 @@ +# tenant-state + +This directory is populated automatically by the ASTRAL pipeline. +Do not place manual files here; they will be overwritten on the next export. diff --git a/tenant-state/entra/.gitkeep b/tenant-state/entra/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tenant-state/intune/.gitkeep b/tenant-state/intune/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tenant-state/reports/entra/.gitkeep b/tenant-state/reports/entra/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tenant-state/reports/intune/.gitkeep b/tenant-state/reports/intune/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_ensure_rolling_pr.py b/tests/test_ensure_rolling_pr.py new file mode 100644 index 0000000..5cc0ec1 --- /dev/null +++ b/tests/test_ensure_rolling_pr.py @@ -0,0 +1,342 @@ +from __future__ import annotations + +import importlib.util +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "ensure_rolling_pr.py" + + +def load_module(): + # Preload common helper so the script can import it. + common_path = MODULE_PATH.parent / "common.py" + common_spec = importlib.util.spec_from_file_location("common", common_path) + if common_spec is not None and common_spec.loader is not None: + common_mod = importlib.util.module_from_spec(common_spec) + sys.modules["common"] = common_mod + common_spec.loader.exec_module(common_mod) + + module_name = "ensure_rolling_pr" + spec = importlib.util.spec_from_file_location(module_name, MODULE_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {MODULE_PATH}") + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def _run(cmd: list[str], cwd: Path) -> None: + subprocess.run(cmd, cwd=cwd, check=True, capture_output=True, text=True) + + +class EnsureRollingPrTests(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.module = load_module() + + def test_is_workload_config_path_filters_docs_and_reports(self) -> None: + is_path = self.module._is_workload_config_path + + self.assertTrue( + is_path( + "tenant-state/entra/Conditional Access/policy.json", + workload_dir="entra", + backup_folder="tenant-state", + reports_subdir="reports", + ) + ) + self.assertFalse( + is_path( + "tenant-state/entra/Conditional Access/policy.md", + workload_dir="entra", + backup_folder="tenant-state", + reports_subdir="reports", + ) + ) + self.assertFalse( + is_path( + "tenant-state/reports/entra/assignment_report.md", + workload_dir="entra", + backup_folder="tenant-state", + reports_subdir="reports", + ) + ) + + def test_config_fingerprint_ignores_docs_and_reports(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = Path(tmp) + _run(["git", "init"], repo) + _run(["git", "config", "user.name", "Test"], repo) + _run(["git", "config", "user.email", "test@example.com"], repo) + + config_file = repo / "tenant-state" / "entra" / "Conditional Access" / "policy.json" + report_file = repo / "tenant-state" / "reports" / "entra" / "summary.md" + doc_file = repo / "tenant-state" / "entra" / "README.md" + config_file.parent.mkdir(parents=True, exist_ok=True) + report_file.parent.mkdir(parents=True, exist_ok=True) + doc_file.parent.mkdir(parents=True, exist_ok=True) + config_file.write_text('{"state":"enabled"}\n', encoding="utf-8") + report_file.write_text("report v1\n", encoding="utf-8") + doc_file.write_text("doc v1\n", encoding="utf-8") + + _run(["git", "add", "."], repo) + _run(["git", "commit", "-m", "initial"], repo) + + fp1 = self.module._config_fingerprint_from_local_tree( + repo_root=str(repo), + commitish="HEAD", + workload_dir="entra", + backup_folder="tenant-state", + reports_subdir="reports", + ) + + report_file.write_text("report v2\n", encoding="utf-8") + doc_file.write_text("doc v2\n", encoding="utf-8") + _run(["git", "add", "."], repo) + _run(["git", "commit", "-m", "doc/report only"], repo) + fp2 = self.module._config_fingerprint_from_local_tree( + repo_root=str(repo), + commitish="HEAD", + workload_dir="entra", + backup_folder="tenant-state", + reports_subdir="reports", + ) + + config_file.write_text('{"state":"disabled"}\n', encoding="utf-8") + _run(["git", "add", "."], repo) + _run(["git", "commit", "-m", "config change"], repo) + fp3 = self.module._config_fingerprint_from_local_tree( + repo_root=str(repo), + commitish="HEAD", + workload_dir="entra", + backup_folder="tenant-state", + reports_subdir="reports", + ) + + self.assertTrue(fp1) + self.assertEqual(fp1, fp2) + self.assertNotEqual(fp2, fp3) + + def test_ref_has_commit_for_local_and_missing_ref(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = Path(tmp) + _run(["git", "init"], repo) + _run(["git", "config", "user.name", "Test"], repo) + _run(["git", "config", "user.email", "test@example.com"], repo) + (repo / "README.md").write_text("x\n", encoding="utf-8") + _run(["git", "add", "."], repo) + _run(["git", "commit", "-m", "init"], repo) + + self.assertTrue(self.module._ref_has_commit(str(repo), "HEAD")) + self.assertFalse(self.module._ref_has_commit(str(repo), "origin/does-not-exist")) + + def test_workload_config_diff_exists_ignores_docs_and_reports(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + repo = Path(tmp) + _run(["git", "init"], repo) + _run(["git", "config", "user.name", "Test"], repo) + _run(["git", "config", "user.email", "test@example.com"], repo) + + config_file = repo / "tenant-state" / "intune" / "Device Configurations" / "policy.json" + report_file = repo / "tenant-state" / "reports" / "intune" / "summary.md" + doc_file = repo / "tenant-state" / "intune" / "README.md" + config_file.parent.mkdir(parents=True, exist_ok=True) + report_file.parent.mkdir(parents=True, exist_ok=True) + doc_file.parent.mkdir(parents=True, exist_ok=True) + config_file.write_text('{"setting":"enabled"}\n', encoding="utf-8") + report_file.write_text("report v1\n", encoding="utf-8") + doc_file.write_text("doc v1\n", encoding="utf-8") + _run(["git", "add", "."], repo) + _run(["git", "commit", "-m", "baseline"], repo) + baseline_commit = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo, + check=True, + capture_output=True, + text=True, + ).stdout.strip() + + report_file.write_text("report v2\n", encoding="utf-8") + doc_file.write_text("doc v2\n", encoding="utf-8") + _run(["git", "add", "."], repo) + _run(["git", "commit", "-m", "doc only"], repo) + doc_only_commit = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo, + check=True, + capture_output=True, + text=True, + ).stdout.strip() + + config_file.write_text('{"setting":"disabled"}\n', encoding="utf-8") + _run(["git", "add", "."], repo) + _run(["git", "commit", "-m", "config change"], repo) + config_change_commit = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo, + check=True, + capture_output=True, + text=True, + ).stdout.strip() + + self.assertFalse( + self.module._workload_config_diff_exists( + repo_root=str(repo), + baseline_commitish=baseline_commit, + drift_commitish=doc_only_commit, + workload_dir="intune", + backup_folder="tenant-state", + reports_subdir="reports", + ) + ) + self.assertTrue( + self.module._workload_config_diff_exists( + repo_root=str(repo), + baseline_commitish=baseline_commit, + drift_commitish=config_change_commit, + workload_dir="intune", + backup_folder="tenant-state", + reports_subdir="reports", + ) + ) + + def test_main_suppresses_pr_creation_when_drift_matches_baseline_config(self) -> None: + env = { + "SYSTEM_ACCESSTOKEN": "token", + "SYSTEM_COLLECTIONURI": "https://dev.azure.com/example", + "SYSTEM_TEAMPROJECT": "Project", + "BUILD_REPOSITORY_ID": "repo-id", + } + + with patch.dict(os.environ, env, clear=False): + with patch.object( + sys, + "argv", + [ + "ensure_rolling_pr.py", + "--repo-root", + "/tmp/repo", + "--workload", + "intune", + "--drift-branch", + "drift/intune", + "--baseline-branch", + "main", + "--pr-title", + "Intune drift review (rolling)", + ], + ): + with patch.object(self.module, "_query_prs", return_value=[]): + with patch.object(self.module, "_run_git"): + with patch.object(self.module, "_ref_has_commit", return_value=True): + with patch.object(self.module, "_workload_config_diff_exists", return_value=False): + with patch.object(self.module, "_request_json") as request_json: + result = self.module.main() + + self.assertEqual(result, 0) + request_json.assert_not_called() + + def test_main_creates_pr_as_draft_when_notification_delay_enabled(self) -> None: + env = { + "SYSTEM_ACCESSTOKEN": "token", + "SYSTEM_COLLECTIONURI": "https://dev.azure.com/example", + "SYSTEM_TEAMPROJECT": "Project", + "BUILD_REPOSITORY_ID": "repo-id", + "BUILD_BUILDNUMBER": "42", + "BUILD_BUILDID": "1001", + "ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS": "true", + } + created_bodies: list[dict[str, object]] = [] + + def request_json(url: str, headers: dict[str, str], method: str = "GET", body: dict[str, object] | None = None): + if method == "POST" and url.endswith("/pullrequests?api-version=7.1"): + created_bodies.append(body or {}) + return {"pullRequestId": 123} + raise AssertionError(f"Unexpected request: {method} {url}") + + with patch.dict(os.environ, env, clear=False): + with patch.object( + sys, + "argv", + [ + "ensure_rolling_pr.py", + "--repo-root", + "/tmp/repo", + "--workload", + "intune", + "--drift-branch", + "drift/intune", + "--baseline-branch", + "main", + "--pr-title", + "Intune drift review (rolling)", + ], + ): + with patch.object(self.module, "_query_prs", side_effect=[[], []]): + with patch.object(self.module, "_run_git"): + with patch.object(self.module, "_ref_has_commit", return_value=True): + with patch.object(self.module, "_workload_config_diff_exists", return_value=True): + with patch.object(self.module, "_tree_id_for_commitish", return_value="tree123"): + with patch.object(self.module, "_find_matching_abandoned_pr", return_value=(None, "")): + with patch.object(self.module, "_request_json", side_effect=request_json): + result = self.module.main() + + self.assertEqual(result, 0) + self.assertEqual(len(created_bodies), 1) + self.assertTrue(created_bodies[0]["isDraft"]) + + def test_main_skips_active_pr_patch_when_already_up_to_date(self) -> None: + env = { + "SYSTEM_ACCESSTOKEN": "token", + "SYSTEM_COLLECTIONURI": "https://dev.azure.com/example", + "SYSTEM_TEAMPROJECT": "Project", + "BUILD_REPOSITORY_ID": "repo-id", + } + + with patch.dict(os.environ, env, clear=False): + with patch.object( + sys, + "argv", + [ + "ensure_rolling_pr.py", + "--repo-root", + "/tmp/repo", + "--workload", + "intune", + "--drift-branch", + "drift/intune", + "--baseline-branch", + "main", + "--pr-title", + "Intune drift review (rolling)", + ], + ): + with patch.object( + self.module, + "_query_prs", + return_value=[ + { + "pullRequestId": 123, + "title": "Intune drift review (rolling)", + "description": "Existing description with summary", + "completionOptions": {"mergeStrategy": "rebase"}, + "url": "https://dev.azure.com/example/_apis/git/repositories/repo/pullRequests/123", + } + ], + ): + with patch.object(self.module, "_request_json") as request_json: + result = self.module.main() + + self.assertEqual(result, 0) + request_json.assert_not_called() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_export_entra_baseline.py b/tests/test_export_entra_baseline.py new file mode 100644 index 0000000..128f915 --- /dev/null +++ b/tests/test_export_entra_baseline.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +import importlib.util +import json +import tempfile +import unittest +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "export_entra_baseline.py" + + +def load_module(): + spec = importlib.util.spec_from_file_location("export_entra_baseline", MODULE_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {MODULE_PATH}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +class ExportEntraBaselineTests(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.module = load_module() + + def _namespace(self, root: Path, fail_on_export_error: str) -> SimpleNamespace: + return SimpleNamespace( + root=str(root), + token="token-value", + include_named_locations="true", + include_authentication_strengths="false", + include_conditional_access="false", + include_enterprise_applications="false", + include_app_registrations="false", + enterprise_app_workers=1, + fail_on_export_error=fail_on_export_error, + previous_snapshot_ref="", + ) + + def test_requested_export_error_is_fatal_by_default(self) -> None: + with tempfile.TemporaryDirectory() as td: + root = Path(td) / "entra" + root.mkdir(parents=True, exist_ok=True) + args = self._namespace(root=root, fail_on_export_error="true") + + with ( + patch.object(self.module, "parse_args", return_value=args), + patch.object(self.module, "GraphClient") as graph_client_cls, + ): + graph_client = MagicMock() + graph_client.get_object.return_value = ({"value": []}, None) + graph_client.get_collection.return_value = ([], "HTTP 500") + graph_client_cls.return_value = graph_client + + result = self.module.main() + self.assertEqual(result, 2) + + def test_requested_export_error_can_be_non_fatal_when_disabled(self) -> None: + with tempfile.TemporaryDirectory() as td: + root = Path(td) / "entra" + root.mkdir(parents=True, exist_ok=True) + args = self._namespace(root=root, fail_on_export_error="false") + + with ( + patch.object(self.module, "parse_args", return_value=args), + patch.object(self.module, "GraphClient") as graph_client_cls, + ): + graph_client = MagicMock() + graph_client.get_object.return_value = ({"value": []}, None) + graph_client.get_collection.return_value = ([], "HTTP 500") + graph_client_cls.return_value = graph_client + + result = self.module.main() + self.assertEqual(result, 0) + + def test_normalize_resolution_error_suppresses_transient_dns_variants(self) -> None: + transient_samples = [ + "", + "Temporary failure resolving 'graph.microsoft.com'", + "Failed to resolve host graph.microsoft.com", + "getaddrinfo failed", + ] + for sample in transient_samples: + with self.subTest(sample=sample): + self.assertEqual(self.module.normalize_resolution_error(sample), "") + + def test_normalize_resolution_error_keeps_non_transient_http_error(self) -> None: + self.assertEqual(self.module.normalize_resolution_error("HTTP 403"), "HTTP 403") + + def test_normalize_branch_name_ignores_unresolved_macro(self) -> None: + self.assertEqual(self.module._normalize_branch_name("$(DRIFT_BRANCH_ENTRA)"), "") + + def test_required_resource_resolution_backfills_unresolved_from_previous(self) -> None: + current = [ + { + "resourceAppId": "00000003-0000-0000-c000-000000000000", + "resourceDisplayName": "Unresolved", + "permissions": [ + { + "id": "perm-id-1", + "type": "Scope", + "value": "", + "displayName": "", + "description": "", + } + ], + } + ] + previous = [ + { + "resourceAppId": "00000003-0000-0000-c000-000000000000", + "resourceDisplayName": "Microsoft Graph", + "permissions": [ + { + "id": "perm-id-1", + "type": "Scope", + "value": "User.Read.All", + "displayName": "Read all users' full profiles", + "description": "Allows the app to read full profiles.", + } + ], + } + ] + + merged = self.module._merge_required_resource_access_resolution(current, previous) + self.assertEqual(merged[0]["resourceDisplayName"], "Microsoft Graph") + self.assertEqual(merged[0]["permissions"][0]["value"], "User.Read.All") + self.assertEqual(merged[0]["permissions"][0]["displayName"], "Read all users' full profiles") + + unresolved_resources, unresolved_permissions = self.module._count_unresolved_required_permissions(merged) + self.assertEqual(unresolved_resources, 0) + self.assertEqual(unresolved_permissions, 0) + + def test_app_role_resolution_backfills_unresolved_from_previous(self) -> None: + current = [ + { + "resourceId": "resource-1", + "resourceDisplayName": "Unresolved", + "appRoleId": "role-1", + "appRoleValue": "", + "appRoleDisplayName": "", + "principalType": "ServicePrincipal", + } + ] + previous = [ + { + "resourceId": "resource-1", + "resourceDisplayName": "Office 365 Exchange Online", + "appRoleId": "role-1", + "appRoleValue": "Exchange.ManageAsApp", + "appRoleDisplayName": "Manage Exchange as application", + "principalType": "ServicePrincipal", + } + ] + + merged = self.module._merge_app_role_assignments_resolution(current, previous) + self.assertEqual(merged[0]["resourceDisplayName"], "Office 365 Exchange Online") + self.assertEqual(merged[0]["appRoleValue"], "Exchange.ManageAsApp") + self.assertEqual(merged[0]["appRoleDisplayName"], "Manage Exchange as application") + + unresolved_resources, unresolved_roles = self.module._count_unresolved_app_role_assignments(merged) + self.assertEqual(unresolved_resources, 0) + self.assertEqual(unresolved_roles, 0) + + def test_required_resource_access_uses_direct_appid_fallback_when_filter_returns_empty(self) -> None: + app = { + "requiredResourceAccess": [ + { + "resourceAppId": "00000003-0000-0000-c000-000000000000", + "resourceAccess": [ + { + "id": "e1fe6dd8-ba31-4d61-89e7-88639da4683d", + "type": "Scope", + } + ], + } + ] + } + client = MagicMock() + client.get_object.side_effect = [ + ({"value": []}, None), + ( + { + "id": "sp-graph", + "appId": "00000003-0000-0000-c000-000000000000", + "displayName": "Microsoft Graph", + "appRoles": [], + "oauth2PermissionScopes": [ + { + "id": "e1fe6dd8-ba31-4d61-89e7-88639da4683d", + "value": "User.Read", + "adminConsentDisplayName": "Sign in and read user profile", + "adminConsentDescription": "Allows sign-in and profile read.", + } + ], + }, + None, + ), + ] + + resolved, unresolved_resources, unresolved_permissions, lookup_errors = self.module.resolve_required_resource_access( + app=app, + client=client, + resource_sp_by_appid={}, + ) + + self.assertEqual(unresolved_resources, 0) + self.assertEqual(unresolved_permissions, 0) + self.assertEqual(lookup_errors, []) + self.assertEqual(resolved[0]["resourceDisplayName"], "Microsoft Graph") + self.assertEqual(resolved[0]["permissions"][0]["value"], "User.Read") + + def test_load_resource_sp_cache_from_export_reads_enterprise_apps(self) -> None: + with tempfile.TemporaryDirectory() as td: + root = Path(td) / "entra" + export_dir = root / "Enterprise Applications" + export_dir.mkdir(parents=True, exist_ok=True) + payload = { + "id": "sp-graph", + "appId": "00000003-0000-0000-c000-000000000000", + "displayName": "Microsoft Graph", + "appRoles": [{"id": "role-1", "value": "Directory.Read.All"}], + "oauth2PermissionScopes": [{"id": "scope-1", "value": "User.Read"}], + } + (export_dir / "Microsoft Graph__sp-graph.json").write_text(json.dumps(payload), encoding="utf-8") + + cache = self.module._load_resource_sp_cache_from_export(root) + + self.assertIn("00000003-0000-0000-c000-000000000000", cache) + graph = cache["00000003-0000-0000-c000-000000000000"] + self.assertEqual(graph["displayName"], "Microsoft Graph") + self.assertEqual(graph["appRoles"][0]["value"], "Directory.Read.All") + self.assertEqual(graph["oauth2PermissionScopes"][0]["value"], "User.Read") + + def test_load_resource_sp_cache_from_export_ignores_invalid_files(self) -> None: + with tempfile.TemporaryDirectory() as td: + root = Path(td) / "entra" + export_dir = root / "Enterprise Applications" + export_dir.mkdir(parents=True, exist_ok=True) + (export_dir / "invalid.json").write_text("{", encoding="utf-8") + (export_dir / "missing-appid.json").write_text(json.dumps({"id": "sp-only"}), encoding="utf-8") + + cache = self.module._load_resource_sp_cache_from_export(root) + + self.assertEqual(cache, {}) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_filter_entra_enrichment_noise.py b/tests/test_filter_entra_enrichment_noise.py new file mode 100644 index 0000000..40051ca --- /dev/null +++ b/tests/test_filter_entra_enrichment_noise.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import importlib.util +import json +import subprocess +import tempfile +import unittest +from pathlib import Path + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "filter_entra_enrichment_noise.py" + + +def load_module(): + spec = importlib.util.spec_from_file_location("filter_entra_enrichment_noise", MODULE_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {MODULE_PATH}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _git(repo: Path, *args: str) -> None: + subprocess.run( + ["git", *args], + cwd=str(repo), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + +class FilterEntraEnrichmentNoiseTests(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.module = load_module() + + def test_is_enrichment_only_change_true(self) -> None: + old_text = json.dumps( + { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Microsoft Graph"}], + "resolutionStatus": {"requiredResourceAccess": {"unresolvedPermissionCount": 0}}, + } + ) + new_text = json.dumps( + { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Unresolved"}], + "resolutionStatus": {"requiredResourceAccess": {"unresolvedPermissionCount": 6}}, + } + ) + self.assertTrue(self.module._is_enrichment_only_change(old_text, new_text)) + + def test_is_enrichment_only_change_false_when_config_changes(self) -> None: + old_text = json.dumps( + { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + } + ) + new_text = json.dumps( + { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "11111111-0000-0000-c000-000000000000"}], + } + ) + self.assertFalse(self.module._is_enrichment_only_change(old_text, new_text)) + + def test_filter_reverts_only_enrichment_changes(self) -> None: + with tempfile.TemporaryDirectory() as td: + repo = Path(td) + _git(repo, "init") + _git(repo, "config", "user.email", "tester@example.com") + _git(repo, "config", "user.name", "Tester") + + workload_dir = repo / "tenant-state" / "entra" / "App Registrations" + workload_dir.mkdir(parents=True, exist_ok=True) + file_path = workload_dir / "Test App__id.json" + baseline = { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Microsoft Graph"}], + "resolutionStatus": {"requiredResourceAccess": {"unresolvedPermissionCount": 0}}, + } + file_path.write_text(json.dumps(baseline, indent=2) + "\n", encoding="utf-8") + _git(repo, "add", ".") + _git(repo, "commit", "-m", "baseline") + + enrichment_only = { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Unresolved"}], + "resolutionStatus": {"requiredResourceAccess": {"unresolvedPermissionCount": 6}}, + } + file_path.write_text(json.dumps(enrichment_only, indent=2) + "\n", encoding="utf-8") + + residual_before = self.module.find_enrichment_only_modified_files( + repo_root=repo, + workload_root="tenant-state/entra", + ) + self.assertEqual(residual_before, ["tenant-state/entra/App Registrations/Test App__id.json"]) + + reverted = self.module.filter_enrichment_only_files(repo_root=repo, workload_root="tenant-state/entra") + + self.assertEqual(reverted, ["tenant-state/entra/App Registrations/Test App__id.json"]) + residual_after = self.module.find_enrichment_only_modified_files( + repo_root=repo, + workload_root="tenant-state/entra", + ) + self.assertEqual(residual_after, []) + status = subprocess.run( + ["git", "status", "--short"], + cwd=str(repo), + check=True, + capture_output=True, + text=True, + ).stdout.strip() + self.assertEqual(status, "") + + def test_filter_keeps_real_config_changes(self) -> None: + with tempfile.TemporaryDirectory() as td: + repo = Path(td) + _git(repo, "init") + _git(repo, "config", "user.email", "tester@example.com") + _git(repo, "config", "user.name", "Tester") + + workload_dir = repo / "tenant-state" / "entra" / "App Registrations" + workload_dir.mkdir(parents=True, exist_ok=True) + file_path = workload_dir / "Test App__id.json" + baseline = { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Microsoft Graph"}], + } + file_path.write_text(json.dumps(baseline, indent=2) + "\n", encoding="utf-8") + _git(repo, "add", ".") + _git(repo, "commit", "-m", "baseline") + + config_changed = { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "11111111-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Unresolved"}], + } + file_path.write_text(json.dumps(config_changed, indent=2) + "\n", encoding="utf-8") + + reverted = self.module.filter_enrichment_only_files(repo_root=repo, workload_root="tenant-state/entra") + + self.assertEqual(reverted, []) + status = subprocess.run( + ["git", "status", "--short"], + cwd=str(repo), + check=True, + capture_output=True, + text=True, + ).stdout + self.assertIn("Test App__id.json", status) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_filter_intune_partial_settings_noise.py b/tests/test_filter_intune_partial_settings_noise.py new file mode 100644 index 0000000..23b353e --- /dev/null +++ b/tests/test_filter_intune_partial_settings_noise.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import importlib.util +import json +import subprocess +import tempfile +import unittest +from pathlib import Path + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "filter_intune_partial_settings_noise.py" + + +def load_module(): + spec = importlib.util.spec_from_file_location("filter_intune_partial_settings_noise", MODULE_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {MODULE_PATH}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _git(repo: Path, *args: str) -> None: + subprocess.run( + ["git", *args], + cwd=str(repo), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + +class FilterIntunePartialSettingsNoiseTests(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.module = load_module() + + def test_partial_payload_detection(self) -> None: + self.assertTrue(self.module._is_partial_settings_payload({"settingCount": 1})) + self.assertTrue(self.module._is_partial_settings_payload({"settingCount": 2, "settings": []})) + self.assertFalse(self.module._is_partial_settings_payload({"settingCount": 0, "settings": []})) + self.assertFalse(self.module._is_partial_settings_payload({"settingCount": 2, "settings": [{"id": "0"}]})) + + def test_restore_partial_settings_from_baseline(self) -> None: + with tempfile.TemporaryDirectory() as td: + repo = Path(td) + _git(repo, "init") + _git(repo, "config", "user.email", "tester@example.com") + _git(repo, "config", "user.name", "Tester") + + workload_dir = repo / "tenant-state" / "intune" / "Settings Catalog" + workload_dir.mkdir(parents=True, exist_ok=True) + file_path = workload_dir / "Policy__abc.json" + + baseline = { + "name": "Policy", + "settingCount": 2, + "settings": [{"id": "0"}, {"id": "1"}], + } + file_path.write_text(json.dumps(baseline, indent=2) + "\n", encoding="utf-8") + _git(repo, "add", ".") + _git(repo, "commit", "-m", "baseline") + + partial = { + "name": "Policy", + "settingCount": 2, + } + file_path.write_text(json.dumps(partial, indent=2) + "\n", encoding="utf-8") + + restored, unresolved = self.module.restore_partial_settings_from_baseline( + repo_root=repo, + backup_root=repo / "tenant-state" / "intune", + baseline_ref="HEAD", + ) + + self.assertEqual(restored, ["tenant-state/intune/Settings Catalog/Policy__abc.json"]) + self.assertEqual(unresolved, []) + payload = json.loads(file_path.read_text(encoding="utf-8")) + self.assertEqual(payload["settings"], [{"id": "0"}, {"id": "1"}]) + + def test_partial_settings_unresolved_without_baseline(self) -> None: + with tempfile.TemporaryDirectory() as td: + repo = Path(td) + _git(repo, "init") + _git(repo, "config", "user.email", "tester@example.com") + _git(repo, "config", "user.name", "Tester") + + (repo / "README.md").write_text("test\n", encoding="utf-8") + _git(repo, "add", ".") + _git(repo, "commit", "-m", "init") + + workload_dir = repo / "tenant-state" / "intune" / "Settings Catalog" + workload_dir.mkdir(parents=True, exist_ok=True) + file_path = workload_dir / "Policy__missing.json" + file_path.write_text(json.dumps({"settingCount": 4}, indent=2) + "\n", encoding="utf-8") + + restored, unresolved = self.module.restore_partial_settings_from_baseline( + repo_root=repo, + backup_root=repo / "tenant-state" / "intune", + baseline_ref="HEAD", + ) + + self.assertEqual(restored, []) + self.assertEqual(unresolved, ["tenant-state/intune/Settings Catalog/Policy__missing.json"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_queue_post_merge_restore.py b/tests/test_queue_post_merge_restore.py new file mode 100644 index 0000000..0ec860b --- /dev/null +++ b/tests/test_queue_post_merge_restore.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import base64 +import importlib.util +import sys +import unittest +from pathlib import Path +from unittest.mock import patch + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "queue_post_merge_restore.py" + + +def load_module(): + # Preload common helper so the script can import it. + common_path = MODULE_PATH.parent / "common.py" + common_spec = importlib.util.spec_from_file_location("common", common_path) + if common_spec is not None and common_spec.loader is not None: + common_mod = importlib.util.module_from_spec(common_spec) + sys.modules["common"] = common_mod + common_spec.loader.exec_module(common_mod) + + module_name = "queue_post_merge_restore" + spec = importlib.util.spec_from_file_location(module_name, MODULE_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {MODULE_PATH}") + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def _marker(path: str) -> str: + encoded = base64.urlsafe_b64encode(path.encode("utf-8")).decode("ascii").rstrip("=") + return f"Automation marker: AUTO-CHANGE-TICKET:{encoded}" + + +class QueuePostMergeRestoreTests(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.module = load_module() + + def test_ticket_path_from_content_decodes_marker(self) -> None: + path = "tenant-state/intune/Device Configurations/macOS - WiFi TEST_macOSWiFiConfiguration__id.json" + content = f"Header\n{_marker(path)}\nBody" + self.assertEqual(self.module._ticket_path_from_content(content), path) + + def test_rejected_ticket_paths_uses_latest_decision(self) -> None: + accepted_path = "tenant-state/intune/Settings Catalog/A.json" + rejected_path = "tenant-state/intune/Settings Catalog/B.json" + threads = [ + { + "comments": [ + {"id": 1, "parentCommentId": 0, "content": _marker(accepted_path)}, + {"id": 2, "parentCommentId": 0, "content": "/reject"}, + {"id": 3, "parentCommentId": 0, "content": "/accept"}, + ] + }, + { + "comments": [ + {"id": 1, "parentCommentId": 0, "content": _marker(rejected_path)}, + {"id": 2, "parentCommentId": 0, "content": "/accept"}, + {"id": 3, "parentCommentId": 0, "content": "/reject"}, + ] + }, + ] + self.assertEqual(self.module._rejected_ticket_paths(threads), [rejected_path]) + + def test_queue_restore_pipeline_includes_selective_params(self) -> None: + captured: dict[str, object] = {} + + def _fake_request(url: str, headers: dict[str, str], method: str = "GET", body: dict | None = None): + captured["url"] = url + captured["method"] = method + captured["body"] = body or {} + return {"id": 123} + + with patch.object(self.module, "_request_json", side_effect=_fake_request): + self.module._queue_restore_pipeline( + collection_uri="https://dev.azure.com/org", + project="proj", + headers={"Authorization": "Bearer x"}, + definition_id=42, + baseline_branch="main", + include_entra_update=False, + dry_run=False, + update_assignments=True, + remove_unmanaged=False, + max_workers=10, + exclude_csv="", + restore_mode="selective", + restore_paths_csv="tenant-state/intune/Device Configurations/macOS - WiFi TEST.json", + ) + + body = captured["body"] + self.assertIsInstance(body, dict) + template = body["templateParameters"] + self.assertEqual(template["restoreMode"], "selective") + self.assertIn("restorePathsCsv", template) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_update_pr_review_summary.py b/tests/test_update_pr_review_summary.py new file mode 100644 index 0000000..909ab65 --- /dev/null +++ b/tests/test_update_pr_review_summary.py @@ -0,0 +1,1146 @@ +from __future__ import annotations + +import io +import importlib.util +import json +import os +import sys +import unittest +from pathlib import Path +from urllib.error import HTTPError +from unittest.mock import patch + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "update_pr_review_summary.py" + + +def load_module(): + # Preload common helper so the script can import it. + common_path = MODULE_PATH.parent / "common.py" + common_spec = importlib.util.spec_from_file_location("common", common_path) + if common_spec is not None and common_spec.loader is not None: + common_mod = importlib.util.module_from_spec(common_spec) + sys.modules["common"] = common_mod + common_spec.loader.exec_module(common_mod) + + module_name = "update_pr_review_summary" + spec = importlib.util.spec_from_file_location(module_name, MODULE_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {MODULE_PATH}") + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +class UpdatePrReviewSummaryTests(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.module = load_module() + + def test_semantic_change_ignores_resolution_status_noise(self) -> None: + old_excerpt = '{"displayName":"App","resolutionStatus":{"owners":{"error":"Temporary failure resolving"}}}' + new_excerpt = '{"displayName":"App","resolutionStatus":{"owners":{"error":""}}}' + semantic = self.module._extract_semantic_change(old_excerpt, new_excerpt) + self.assertEqual(semantic, "No semantic key changes detected") + + def test_existing_change_fingerprint_parses_auto_block(self) -> None: + description = ( + "Intro text\n\n" + "\n" + "## Automated Review Summary (entra)\n\n" + "- **Change Fingerprint:** `A1B2c3D4e5F6`\n" + "\n" + ) + fingerprint = self.module._existing_change_fingerprint(description) + self.assertEqual(fingerprint, "a1b2c3d4e5f6") + + def test_existing_change_fingerprint_returns_empty_when_missing(self) -> None: + description = "## Automated Review Summary\n- **Change Fingerprint:** `abcdef012345`" + fingerprint = self.module._existing_change_fingerprint(description) + self.assertEqual(fingerprint, "") + + def test_existing_summary_version_parses_auto_block(self) -> None: + description = ( + "Intro text\n\n" + "\n" + "## Automated Review Summary (entra)\n\n" + "- **Summary Version:** `2026-03-19b`\n" + "- **Change Fingerprint:** `A1B2c3D4e5F6`\n" + "\n" + ) + version = self.module._existing_summary_version(description) + self.assertEqual(version, "2026-03-19b") + + def test_existing_summary_version_returns_empty_when_missing(self) -> None: + description = ( + "\n" + "## Automated Review Summary (intune)\n" + "\n" + ) + self.assertEqual(self.module._existing_summary_version(description), "") + + def test_auto_block_body_extracts_marked_content(self) -> None: + description = ( + "Header\n\n" + "\n" + "Line A\nLine B\n" + "\n" + ) + body = self.module._auto_block_body(description) + self.assertIn("Line A", body) + self.assertIn("Line B", body) + + def test_auto_block_body_empty_when_markers_missing(self) -> None: + self.assertEqual(self.module._auto_block_body("no markers"), "") + + def test_upsert_auto_block_places_summary_before_reviewer_actions(self) -> None: + description = ( + "Rolling drift PR created by backup pipeline.\n\n" + "- Source branch: `drift/intune`\n" + "- Target branch: `main`\n" + "- Last pipeline run: `1` (BuildId: 1)\n\n" + "## Reviewer Quick Actions\n\n" + "### 1) Accept all changes\n" + ) + block = ( + "\n" + "## Automated Review Summary (intune)\n" + "" + ) + updated = self.module._upsert_auto_block(description, block) + summary_pos = updated.find("## Automated Review Summary") + actions_pos = updated.find("## Reviewer Quick Actions") + self.assertGreaterEqual(summary_pos, 0) + self.assertGreaterEqual(actions_pos, 0) + self.assertLess(summary_pos, actions_pos) + + def test_upsert_auto_block_repositions_existing_summary_before_reviewer_actions(self) -> None: + description = ( + "Rolling drift PR created by backup pipeline.\n\n" + "- Source branch: `drift/intune`\n" + "- Target branch: `main`\n" + "- Last pipeline run: `1` (BuildId: 1)\n\n" + "## Reviewer Quick Actions\n\n" + "### 1) Accept all changes\n\n" + "\n" + "Old summary\n" + "\n" + ) + block = ( + "\n" + "## Automated Review Summary (intune)\n" + "" + ) + updated = self.module._upsert_auto_block(description, block) + summary_pos = updated.find("## Automated Review Summary") + actions_pos = updated.find("## Reviewer Quick Actions") + self.assertLess(summary_pos, actions_pos) + self.assertEqual(updated.count(""), 1) + + def test_publish_draft_pr_updates_is_draft_when_delay_enabled(self) -> None: + calls: list[dict[str, object]] = [] + + def request_json(url: str, token: str, method: str = "GET", body: dict[str, object] | None = None): + calls.append({"url": url, "token": token, "method": method, "body": body or {}}) + return {} + + with patch.dict(os.environ, {"ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS": "true"}, clear=False): + with patch.object(self.module, "_request_json", side_effect=request_json): + published = self.module._publish_draft_pr( + repo_api="https://dev.azure.com/org/project/_apis/git/repositories/repo", + token="token", + pr_id=77, + title="PR title", + description="PR description", + is_draft=True, + ) + + self.assertTrue(published) + self.assertEqual(len(calls), 1) + self.assertEqual(calls[0]["method"], "PATCH") + self.assertEqual(calls[0]["body"]["isDraft"], False) + + def test_publish_draft_pr_skips_when_delay_disabled(self) -> None: + with patch.dict(os.environ, {"ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS": "false"}, clear=False): + with patch.object(self.module, "_request_json") as request_json: + published = self.module._publish_draft_pr( + repo_api="https://dev.azure.com/org/project/_apis/git/repositories/repo", + token="token", + pr_id=77, + title="PR title", + description="PR description", + is_draft=True, + ) + + self.assertFalse(published) + request_json.assert_not_called() + + def test_auto_block_contains_ai_fallback_true_for_fallback_marker(self) -> None: + body = "...\n_AI fallback used: Azure OpenAI unavailable (timeout)_\n..." + self.assertTrue(self.module._auto_block_contains_ai_fallback(body)) + + def test_auto_block_contains_ai_fallback_true_for_unavailable_marker(self) -> None: + body = "...\n_AI summary unavailable: Azure OpenAI is not configured_\n..." + self.assertTrue(self.module._auto_block_contains_ai_fallback(body)) + + def test_auto_block_contains_ai_fallback_false_for_normal_ai_text(self) -> None: + body = "### AI Reviewer Narrative\nEverything looks consistent." + self.assertFalse(self.module._auto_block_contains_ai_fallback(body)) + + def test_preferred_aoai_token_param_uses_max_completion_tokens_for_gpt5(self) -> None: + with patch.dict(os.environ, {}, clear=False): + token_param = self.module._preferred_aoai_token_param("gpt-5.3-chat") + self.assertEqual(token_param, "max_completion_tokens") + + def test_preferred_aoai_token_param_uses_max_tokens_for_non_gpt5(self) -> None: + with patch.dict(os.environ, {}, clear=False): + token_param = self.module._preferred_aoai_token_param("gpt-4.1") + self.assertEqual(token_param, "max_tokens") + + def test_preferred_aoai_token_param_honors_override(self) -> None: + with patch.dict(os.environ, {"AZURE_OPENAI_TOKEN_PARAM": "max_tokens"}, clear=False): + token_param = self.module._preferred_aoai_token_param("gpt-5.3-chat") + self.assertEqual(token_param, "max_tokens") + + def test_preferred_aoai_temperature_omits_for_gpt5(self) -> None: + with patch.dict(os.environ, {}, clear=False): + temperature = self.module._preferred_aoai_temperature("gpt-5.3-chat") + self.assertEqual(temperature, None) + + def test_preferred_aoai_temperature_defaults_to_zero_for_non_gpt5(self) -> None: + with patch.dict(os.environ, {}, clear=False): + temperature = self.module._preferred_aoai_temperature("gpt-4.1") + self.assertEqual(temperature, 0.0) + + def test_preferred_aoai_temperature_honors_override(self) -> None: + with patch.dict(os.environ, {"AZURE_OPENAI_TEMPERATURE": "0.7"}, clear=False): + temperature = self.module._preferred_aoai_temperature("gpt-5.3-chat") + self.assertEqual(temperature, 0.7) + + def test_reviewer_instruction_requests_infrastructure_vs_admin_distinction(self) -> None: + instruction = self.module._reviewer_instruction() + self.assertIn("platform-managed or vendor-driven infrastructure drift", instruction) + self.assertIn("tenant-admin changes", instruction) + self.assertIn("mixed or insufficient", instruction) + + def test_minimal_reviewer_instruction_requests_change_source_classification(self) -> None: + instruction = self.module._minimal_reviewer_instruction() + self.assertIn("infrastructure/platform-driven", instruction) + self.assertIn("admin-driven", instruction) + self.assertIn("mixed/uncertain", instruction) + + def test_compact_ai_narrative_markdown_preserves_all_reviewer_sections(self) -> None: + text = ( + "Plain-language summary\n" + + ("Summary text. " * 20) + + "\n\nOperational impact\n" + + ("Operational impact text. " * 20) + + "\n\nRisk assessment rationale\n" + + ("Risk rationale text. " * 20) + + "\n\nRecommended reviewer checks\n" + + "- Check one\n- Check two\n- Check three\n" + + "\nRollback considerations\n" + + ("Rollback text. " * 20) + ) + compact = self.module._compact_ai_narrative_markdown(text, 420) + self.assertLessEqual(len(compact), 420) + self.assertIn("#### Plain-Language Summary", compact) + self.assertIn("#### Operational Impact", compact) + self.assertIn("#### Risk Assessment Rationale", compact) + self.assertIn("#### Recommended Reviewer Checks", compact) + self.assertIn("#### Rollback Considerations", compact) + + def test_extract_ai_text_from_payload_rejects_truncated_response(self) -> None: + text, error = self.module._extract_ai_text_from_payload( + { + "choices": [ + { + "finish_reason": "length", + "message": {"content": "This output was cut off"}, + } + ] + } + ) + self.assertEqual(text, "") + self.assertIn("finish_reason=length", error) + self.assertIn("partial content suppressed", error) + + def test_classify_change_source_marks_enterprise_app_add_as_infrastructure(self) -> None: + change = self.module.ChangeItem( + operation="Added", + path="tenant-state/entra/Enterprise Applications/Microsoft Foo__id.json", + risk_score=3, + risk_label="HIGH", + reason="Security or broad policy area", + policy_type="identity_security", + severity="HIGH", + ) + source = self.module._classify_change_source(change, "New configuration object added") + self.assertEqual(source["label"], "likely_infrastructure_driven") + + def test_classify_change_source_marks_assignment_change_as_admin(self) -> None: + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/entra/Conditional Access/CA Policy__id.json", + risk_score=3, + risk_label="HIGH", + reason="Security or broad policy area", + policy_type="conditional_access", + severity="HIGH", + ) + source = self.module._classify_change_source( + change, + "assignment scope: likely broader (fewer exclusion targets)", + ) + self.assertEqual(source["label"], "likely_admin_driven") + + def test_extract_semantic_change_new_app_includes_security_fields(self) -> None: + new_excerpt = json.dumps( + { + "displayName": "Headless", + "requiredResourceAccess": [ + {"resourceAppId": "0003", "resourceAccess": [{"id": "abc", "type": "Scope"}]} + ], + "appRoles": [{"value": "Admin", "displayName": "Admin"}], + "passwordCredentials": [{"hint": "abc"}], + "signInAudience": "AzureADMultipleOrgs", + } + ) + semantic = self.module._extract_semantic_change( + "", new_excerpt, "tenant-state/entra/App Registrations/Headless.json" + ) + self.assertIn("New configuration object added", semantic) + self.assertIn("requiredResourceAccess present", semantic) + self.assertIn("appRoles present", semantic) + self.assertIn("passwordCredentials present", semantic) + self.assertIn("signInAudience=AzureADMultipleOrgs", semantic) + + def test_extract_semantic_change_new_app_without_security_fields_is_generic(self) -> None: + new_excerpt = json.dumps({"displayName": "Headless"}) + semantic = self.module._extract_semantic_change( + "", new_excerpt, "tenant-state/entra/App Registrations/Headless.json" + ) + self.assertEqual(semantic, "New configuration object added") + + def test_classify_change_source_app_reg_permission_changes_are_admin_driven(self) -> None: + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/entra/App Registrations/App.json", + risk_score=3, + risk_label="HIGH", + reason="Security or broad policy area", + policy_type="identity_security", + severity="HIGH", + ) + source = self.module._classify_change_source(change, "requiredResourceAccess present") + self.assertEqual(source["label"], "likely_admin_driven") + + def test_classify_change_source_enterprise_app_permission_changes_are_admin_driven(self) -> None: + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/entra/Enterprise Applications/App.json", + risk_score=3, + risk_label="HIGH", + reason="Security or broad policy area", + policy_type="identity_security", + severity="HIGH", + ) + source = self.module._classify_change_source(change, "oauth2PermissionScopes changed") + self.assertEqual(source["label"], "likely_admin_driven") + + def test_reviewer_instruction_warns_against_downgrading_app_identity_risk(self) -> None: + instruction = self.module._reviewer_instruction() + self.assertIn("App Registrations and Enterprise Applications", instruction) + self.assertIn("do not downgrade risk to LOW", instruction) + self.assertIn("passwordCredentials", instruction) + + def test_build_change_source_assessment_marks_split_signals_as_mixed(self) -> None: + assessment = self.module._build_change_source_assessment( + [ + { + "change_source": "likely_admin_driven", + "change_source_reasons": ["Assignment/targeting semantics changed"], + "change_source_scores": {"admin": 5, "infrastructure": 0}, + }, + { + "change_source": "likely_infrastructure_driven", + "change_source_reasons": ["Enterprise application inventory often contains platform-managed object churn"], + "change_source_scores": {"admin": 0, "infrastructure": 5}, + }, + ] + ) + self.assertEqual(assessment["dominant_source"], "mixed_or_uncertain") + + def test_call_azure_openai_payload_includes_change_source_assessment(self) -> None: + class _FakeResponse: + def __init__(self, payload: dict) -> None: + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self) -> bytes: + return json.dumps(self._payload).encode("utf-8") + + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/intune/Device Configurations/P1__id.json", + risk_score=2, + risk_label="MEDIUM", + reason="Workload configuration area", + policy_type="device_configuration", + severity="MEDIUM", + ) + + env = { + "ENABLE_PR_AI_SUMMARY": "true", + "AZURE_OPENAI_ENDPOINT": "https://example.openai.azure.com", + "AZURE_OPENAI_DEPLOYMENT": "gpt", + "AZURE_OPENAI_API_KEY": "key", + "PR_AI_REQUEST_MAX_ATTEMPTS": "1", + "PR_AI_REQUEST_TIMEOUT_SECONDS": "10", + } + + seen_payloads: list[dict] = [] + + def _fake_urlopen(request, timeout=0): + payload = json.loads(request.data.decode("utf-8")) + seen_payloads.append(payload) + return _FakeResponse( + { + "choices": [ + { + "finish_reason": "stop", + "message": {"content": "AI summary ready"}, + } + ] + } + ) + + with patch.dict(os.environ, env, clear=False), patch.object( + self.module, "_load_policy_excerpt", return_value="{}" + ), patch.object(self.module, "urlopen", side_effect=_fake_urlopen): + content, error = self.module._call_azure_openai( + changes=[change], + deterministic_summary="deterministic", + workload="intune", + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/intune", + ) + + self.assertEqual(error, None) + self.assertEqual(content, "AI summary ready") + self.assertTrue(seen_payloads) + user_payload = json.loads(seen_payloads[0]["messages"][1]["content"]) + self.assertIn("change_source_assessment", user_payload) + self.assertEqual( + user_payload["change_source_assessment"]["dominant_source"], + "primarily_admin_driven", + ) + + def test_call_azure_openai_retries_with_minimal_prompt_after_truncated_output(self) -> None: + class _FakeResponse: + def __init__(self, payload: dict) -> None: + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self) -> bytes: + return json.dumps(self._payload).encode("utf-8") + + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/intune/Device Configurations/P1__id.json", + risk_score=3, + risk_label="HIGH", + reason="Security or broad policy area", + policy_type="device_configuration", + severity="HIGH", + ) + + env = { + "ENABLE_PR_AI_SUMMARY": "true", + "AZURE_OPENAI_ENDPOINT": "https://example.openai.azure.com", + "AZURE_OPENAI_DEPLOYMENT": "gpt", + "AZURE_OPENAI_API_KEY": "key", + "PR_AI_REQUEST_MAX_ATTEMPTS": "1", + "PR_AI_REQUEST_TIMEOUT_SECONDS": "10", + } + + seen_payloads: list[dict] = [] + responses = iter( + [ + { + "choices": [ + { + "finish_reason": "length", + "message": {"content": "Truncated reviewer text"}, + } + ] + }, + { + "choices": [ + { + "finish_reason": "stop", + "message": {"content": "Complete fallback reviewer summary"}, + } + ] + }, + ] + ) + + def _fake_urlopen(request, timeout=0): + payload = json.loads(request.data.decode("utf-8")) + seen_payloads.append(payload) + return _FakeResponse(next(responses)) + + with patch.dict(os.environ, env, clear=False), patch.object( + self.module, "_load_policy_excerpt", return_value="{}" + ), patch.object(self.module, "urlopen", side_effect=_fake_urlopen): + content, error = self.module._call_azure_openai( + changes=[change], + deterministic_summary="deterministic", + workload="intune", + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/intune", + ) + + self.assertEqual(error, None) + self.assertEqual(content, "Complete fallback reviewer summary") + self.assertEqual(len(seen_payloads), 2) + first_payload = json.loads(seen_payloads[0]["messages"][1]["content"]) + second_payload = json.loads(seen_payloads[1]["messages"][1]["content"]) + self.assertIn("sampled_changes", first_payload) + self.assertTrue("sampled_changes" in second_payload or "changes" in second_payload) + + def test_build_full_ai_review_thread_content_includes_marker(self) -> None: + content = self.module._build_full_ai_review_thread_content( + "intune", + "Plain-language summary\nEverything looks consistent.", + ) + self.assertIn("AI reviewer narrative (full)", content) + self.assertIn("Automation marker: AUTO-AI-REVIEW:intune", content) + + def test_sync_full_ai_review_thread_skips_duplicate_comment(self) -> None: + existing_content = self.module._build_full_ai_review_thread_content( + "intune", + "Plain-language summary\nEverything looks consistent.", + ) + calls: list[tuple[str, str]] = [] + + def request_json(url: str, token: str, method: str = "GET", body: dict[str, object] | None = None): + calls.append((method, url)) + if method == "GET" and url.endswith("/pullrequests/77/threads?api-version=7.1"): + return { + "value": [ + { + "id": 10, + "status": "active", + "comments": [{"content": existing_content}], + } + ] + } + raise AssertionError(f"Unexpected request: {method} {url}") + + with patch.object(self.module, "_request_json", side_effect=request_json): + updated = self.module._sync_full_ai_review_thread( + repo_api="https://dev.azure.com/org/project/_apis/git/repositories/repo", + pr_id=77, + token="token", + workload="intune", + ai_summary="Plain-language summary\nEverything looks consistent.", + ) + + self.assertFalse(updated) + self.assertEqual(calls, [("GET", "https://dev.azure.com/org/project/_apis/git/repositories/repo/pullrequests/77/threads?api-version=7.1")]) + + def test_sync_full_ai_review_thread_creates_thread_when_missing(self) -> None: + calls: list[tuple[str, str, dict[str, object] | None]] = [] + + def request_json(url: str, token: str, method: str = "GET", body: dict[str, object] | None = None): + calls.append((method, url, body)) + if method == "GET" and url.endswith("/pullrequests/77/threads?api-version=7.1"): + return {"value": []} + if method == "POST" and url.endswith("/pullrequests/77/threads?api-version=7.1"): + return {"id": 10} + raise AssertionError(f"Unexpected request: {method} {url}") + + with patch.object(self.module, "_request_json", side_effect=request_json): + updated = self.module._sync_full_ai_review_thread( + repo_api="https://dev.azure.com/org/project/_apis/git/repositories/repo", + pr_id=77, + token="token", + workload="intune", + ai_summary="Plain-language summary\nEverything looks consistent.", + ) + + self.assertTrue(updated) + self.assertEqual(len(calls), 2) + self.assertEqual(calls[0][0], "GET") + self.assertEqual(calls[1][0], "POST") + post_body = calls[1][2] or {} + self.assertIn("comments", post_body) + + def test_assignment_signature_uses_group_display_name_when_available(self) -> None: + payload = { + "assignments": [ + { + "source": "direct", + "intent": "apply", + "target": { + "@odata.type": "#microsoft.graph.groupAssignmentTarget", + "groupId": "9d7195ed-f42e-4cbe-9659-2c3c9f55cdd9", + "groupDisplayName": "Intune_U_TK_Test", + }, + } + ] + } + entries = self.module._assignment_entries(payload) + self.assertEqual(len(entries), 1) + signature = self.module._assignment_signature(entries[0]) + self.assertIn("group=Intune_U_TK_Test (9d7195ed-f42e-4cbe-9659-2c3c9f55cdd9)", signature) + + def test_has_matching_detected_change_comment_true_when_change_and_risk_match(self) -> None: + comments = [ + { + "content": ( + "Detected change (auto): Modified: assignment targets added: group=Intune_U_TK_Test\n\n" + "Risk context: MEDIUM (device_configuration): Workload configuration area" + ) + } + ] + matched = self.module._has_matching_detected_change_comment( + comments=comments, + change_summary="Modified: assignment targets added: group=Intune_U_TK_Test", + risk_summary="MEDIUM (device_configuration): Workload configuration area", + ) + self.assertTrue(matched) + + def test_has_matching_detected_change_comment_false_for_stale_comment(self) -> None: + comments = [ + { + "content": ( + "Detected change (auto): Modified: assignment targets added: group=9d7195ed-f42e\n\n" + "Risk context: MEDIUM (device_configuration): Workload configuration area" + ) + } + ] + matched = self.module._has_matching_detected_change_comment( + comments=comments, + change_summary="Modified: assignment targets added: group=Intune_U_TK_Test (9d7195ed-f42e)\n", + risk_summary="MEDIUM (device_configuration): Workload configuration area", + ) + self.assertFalse(matched) + + def test_entra_enrichment_only_json_change_true(self) -> None: + old_excerpt = """ + { + "id": "obj-1", + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Microsoft Graph"}], + "resolutionStatus": {"requiredResourceAccess": {"unresolvedPermissionCount": 0}} + } + """ + new_excerpt = """ + { + "id": "obj-1", + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}], + "requiredResourceAccessResolved": [{"resourceDisplayName": "Unresolved"}], + "resolutionStatus": {"requiredResourceAccess": {"unresolvedPermissionCount": 6}} + } + """ + self.assertTrue(self.module._is_entra_enrichment_only_json_change(old_excerpt, new_excerpt)) + + def test_entra_enrichment_only_json_change_false_when_config_changes(self) -> None: + old_excerpt = """ + { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "00000003-0000-0000-c000-000000000000"}] + } + """ + new_excerpt = """ + { + "displayName": "App", + "requiredResourceAccess": [{"resourceAppId": "11111111-0000-0000-c000-000000000000"}] + } + """ + self.assertFalse(self.module._is_entra_enrichment_only_json_change(old_excerpt, new_excerpt)) + + def test_filter_operational_noise_changes_ignores_entra_enrichment_only_paths(self) -> None: + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/entra/App Registrations/Test App__id.json", + risk_score=3, + risk_label="HIGH", + reason="Security or broad policy area", + policy_type="identity_security", + severity="HIGH", + ) + excerpts = { + ("main", change.path): ( + '{"displayName":"App","requiredResourceAccess":[{"resourceAppId":"00000003-0000-0000-c000-000000000000"}],' + '"requiredResourceAccessResolved":[{"resourceDisplayName":"Microsoft Graph"}],"resolutionStatus":{"x":0}}' + ), + ("drift/entra", change.path): ( + '{"displayName":"App","requiredResourceAccess":[{"resourceAppId":"00000003-0000-0000-c000-000000000000"}],' + '"requiredResourceAccessResolved":[{"resourceDisplayName":"Unresolved"}],"resolutionStatus":{"x":1}}' + ), + } + + def _fake_load(repo_root: str, branch: str, path: str, max_chars: int = 0) -> str: + return excerpts.get((branch, path), "") + + with patch.object(self.module, "_load_policy_excerpt", side_effect=_fake_load): + filtered, ignored = self.module._filter_operational_noise_changes( + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/entra", + workload="entra", + changes=[change], + ) + + self.assertEqual(ignored, 1) + self.assertEqual(filtered, []) + + def test_deterministic_summary_includes_operational_ignore_count(self) -> None: + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/intune/Device Configurations/P1__id.json", + risk_score=3, + risk_label="HIGH", + reason="Security or broad policy area", + policy_type="device_configuration", + severity="HIGH", + ) + summary = self.module._build_deterministic_summary( + [change], + drift_branch="drift/intune", + baseline_branch="main", + ignored_operational_count=2, + ) + self.assertIn("Operational-Only Changes Ignored", summary) + self.assertIn("**2**", summary) + + def test_assignment_scope_when_exclusion_target_removed_is_broader(self) -> None: + old_payload = { + "assignments": [ + { + "source": "direct", + "intent": "apply", + "target": { + "@odata.type": "#microsoft.graph.groupAssignmentTarget", + "groupId": "11111111-1111-1111-1111-111111111111", + "groupDisplayName": "CA001_INC", + }, + }, + { + "source": "direct", + "intent": "apply", + "target": { + "@odata.type": "#microsoft.graph.exclusionGroupAssignmentTarget", + "groupId": "22222222-2222-2222-2222-222222222222", + "groupDisplayName": "CA002_EXC", + }, + }, + ] + } + new_payload = { + "assignments": [ + { + "source": "direct", + "intent": "apply", + "target": { + "@odata.type": "#microsoft.graph.groupAssignmentTarget", + "groupId": "11111111-1111-1111-1111-111111111111", + "groupDisplayName": "CA001_INC", + }, + } + ] + } + changes = self.module._describe_assignment_changes(old_payload, new_payload) + self.assertIn("assignment scope: likely broader (fewer exclusion targets)", changes) + + def test_assignment_scope_when_exclusion_target_added_is_narrower(self) -> None: + old_payload = { + "assignments": [ + { + "source": "direct", + "intent": "apply", + "target": { + "@odata.type": "#microsoft.graph.groupAssignmentTarget", + "groupId": "11111111-1111-1111-1111-111111111111", + "groupDisplayName": "CA001_INC", + }, + } + ] + } + new_payload = { + "assignments": [ + { + "source": "direct", + "intent": "apply", + "target": { + "@odata.type": "#microsoft.graph.groupAssignmentTarget", + "groupId": "11111111-1111-1111-1111-111111111111", + "groupDisplayName": "CA001_INC", + }, + }, + { + "source": "direct", + "intent": "apply", + "target": { + "@odata.type": "#microsoft.graph.exclusionGroupAssignmentTarget", + "groupId": "22222222-2222-2222-2222-222222222222", + "groupDisplayName": "CA002_EXC", + }, + }, + ] + } + changes = self.module._describe_assignment_changes(old_payload, new_payload) + self.assertIn("assignment scope: likely narrower (more exclusion targets)", changes) + + def test_call_azure_openai_retries_timeout_then_succeeds(self) -> None: + class _FakeResponse: + def __init__(self, payload: dict) -> None: + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self) -> bytes: + return json.dumps(self._payload).encode("utf-8") + + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/intune/Device Configurations/P1__id.json", + risk_score=2, + risk_label="MEDIUM", + reason="Workload configuration area", + policy_type="device_configuration", + severity="MEDIUM", + ) + + env = { + "ENABLE_PR_AI_SUMMARY": "true", + "AZURE_OPENAI_ENDPOINT": "https://example.openai.azure.com", + "AZURE_OPENAI_DEPLOYMENT": "gpt", + "AZURE_OPENAI_API_KEY": "key", + "PR_AI_REQUEST_MAX_ATTEMPTS": "2", + "PR_AI_REQUEST_TIMEOUT_SECONDS": "10", + } + + with patch.dict(os.environ, env, clear=False), patch.object( + self.module, "_load_policy_excerpt", return_value="{}" + ), patch.object( + self.module, + "urlopen", + side_effect=[ + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + _FakeResponse( + { + "choices": [ + { + "finish_reason": "stop", + "message": {"content": "AI summary ready"}, + } + ] + } + ), + ], + ), patch.object(self.module.time, "sleep", return_value=None): + content, error = self.module._call_azure_openai( + changes=[change], + deterministic_summary="deterministic", + workload="intune", + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/intune", + ) + + self.assertEqual(error, None) + self.assertEqual(content, "AI summary ready") + + def test_call_azure_openai_falls_back_after_timeout_retries(self) -> None: + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/intune/Device Configurations/P1__id.json", + risk_score=2, + risk_label="MEDIUM", + reason="Workload configuration area", + policy_type="device_configuration", + severity="MEDIUM", + ) + + env = { + "ENABLE_PR_AI_SUMMARY": "true", + "AZURE_OPENAI_ENDPOINT": "https://example.openai.azure.com", + "AZURE_OPENAI_DEPLOYMENT": "gpt", + "AZURE_OPENAI_API_KEY": "key", + "PR_AI_REQUEST_MAX_ATTEMPTS": "2", + "PR_AI_REQUEST_TIMEOUT_SECONDS": "10", + } + + with patch.dict(os.environ, env, clear=False), patch.object( + self.module, "_load_policy_excerpt", return_value="{}" + ), patch.object( + self.module, + "urlopen", + side_effect=[ + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + ], + ), patch.object(self.module.time, "sleep", return_value=None): + content, error = self.module._call_azure_openai( + changes=[change], + deterministic_summary="deterministic", + workload="intune", + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/intune", + ) + + self.assertEqual(error, None) + self.assertIsNotNone(content) + self.assertIn("AI fallback used", content) + self.assertIn("timed out after 2 attempts", content) + + def test_call_azure_openai_uses_minimal_retry_after_timeout(self) -> None: + class _FakeResponse: + def __init__(self, payload: dict) -> None: + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self) -> bytes: + return json.dumps(self._payload).encode("utf-8") + + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/intune/Device Configurations/P1__id.json", + risk_score=2, + risk_label="MEDIUM", + reason="Workload configuration area", + policy_type="device_configuration", + severity="MEDIUM", + ) + + env = { + "ENABLE_PR_AI_SUMMARY": "true", + "AZURE_OPENAI_ENDPOINT": "https://example.openai.azure.com", + "AZURE_OPENAI_DEPLOYMENT": "gpt", + "AZURE_OPENAI_API_KEY": "key", + "PR_AI_REQUEST_MAX_ATTEMPTS": "2", + "PR_AI_REQUEST_TIMEOUT_SECONDS": "10", + "PR_AI_MINIMAL_CHANGE_LIMIT": "4", + "PR_AI_MINIMAL_MAX_TOKENS": "350", + } + + side_effects = [ + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + _FakeResponse( + { + "choices": [ + { + "finish_reason": "stop", + "message": {"content": "Recovered via minimal retry"}, + } + ] + } + ), + ] + + with patch.dict(os.environ, env, clear=False), patch.object( + self.module, "_load_policy_excerpt", return_value="{}" + ), patch.object( + self.module, "urlopen", side_effect=side_effects + ), patch.object(self.module.time, "sleep", return_value=None): + content, error = self.module._call_azure_openai( + changes=[change], + deterministic_summary="deterministic", + workload="intune", + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/intune", + ) + + self.assertEqual(error, None) + self.assertEqual(content, "Recovered via minimal retry") + + def test_call_azure_openai_switches_token_param_when_unsupported(self) -> None: + class _FakeResponse: + def __init__(self, payload: dict) -> None: + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self) -> bytes: + return json.dumps(self._payload).encode("utf-8") + + change = self.module.ChangeItem( + operation="Modified", + path="tenant-state/intune/Device Configurations/P1__id.json", + risk_score=2, + risk_label="MEDIUM", + reason="Workload configuration area", + policy_type="device_configuration", + severity="MEDIUM", + ) + + env = { + "ENABLE_PR_AI_SUMMARY": "true", + "AZURE_OPENAI_ENDPOINT": "https://example.openai.azure.com", + "AZURE_OPENAI_DEPLOYMENT": "gpt-5.3-chat", + "AZURE_OPENAI_API_KEY": "key", + "PR_AI_REQUEST_MAX_ATTEMPTS": "1", + "PR_AI_REQUEST_TIMEOUT_SECONDS": "10", + } + + def _fake_urlopen(request, timeout=0): + payload = json.loads(request.data.decode("utf-8")) + if "max_completion_tokens" in payload: + return _FakeResponse( + { + "choices": [ + { + "finish_reason": "stop", + "message": {"content": "Token param compatibility recovered"}, + } + ] + } + ) + raise HTTPError( + request.full_url, + 400, + "Bad Request", + hdrs=None, + fp=io.BytesIO( + b'{"error":{"message":"Unsupported parameter: \'max_tokens\' is not supported with this model. Use \'max_completion_tokens\' instead."}}' + ), + ) + + with patch.dict(os.environ, env, clear=False), patch.object( + self.module, "_load_policy_excerpt", return_value="{}" + ), patch.object( + self.module, "urlopen", side_effect=_fake_urlopen + ), patch.object(self.module.time, "sleep", return_value=None): + content, error = self.module._call_azure_openai( + changes=[change], + deterministic_summary="deterministic", + workload="intune", + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/intune", + ) + + self.assertEqual(error, None) + self.assertEqual(content, "Token param compatibility recovered") + + def test_call_azure_openai_uses_compact_retry_after_timeout(self) -> None: + class _FakeResponse: + def __init__(self, payload: dict) -> None: + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self) -> bytes: + return json.dumps(self._payload).encode("utf-8") + + changes: list = [] + for idx in range(13): + changes.append( + self.module.ChangeItem( + operation="Modified", + path=f"tenant-state/intune/Device Configurations/P{idx}__id.json", + risk_score=2, + risk_label="MEDIUM", + reason="Workload configuration area", + policy_type="device_configuration", + severity="MEDIUM", + ) + ) + + env = { + "ENABLE_PR_AI_SUMMARY": "true", + "AZURE_OPENAI_ENDPOINT": "https://example.openai.azure.com", + "AZURE_OPENAI_DEPLOYMENT": "gpt", + "AZURE_OPENAI_API_KEY": "key", + "PR_AI_REQUEST_MAX_ATTEMPTS": "2", + "PR_AI_REQUEST_TIMEOUT_SECONDS": "10", + "PR_AI_COMPACT_CHANGE_LIMIT": "10", + "PR_AI_COMPACT_MAX_TOKENS": "400", + } + + side_effects = [ + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + TimeoutError("The read operation timed out"), + _FakeResponse( + { + "choices": [ + { + "finish_reason": "stop", + "message": {"content": "Recovered via compact retry"}, + } + ] + } + ), + ] + + with patch.dict(os.environ, env, clear=False), patch.object( + self.module, "_load_policy_excerpt", return_value="{}" + ), patch.object( + self.module, "urlopen", side_effect=side_effects + ), patch.object(self.module.time, "sleep", return_value=None): + content, error = self.module._call_azure_openai( + changes=changes, + deterministic_summary="deterministic", + workload="intune", + repo_root="/tmp/repo", + baseline_branch="main", + drift_branch="drift/intune", + ) + + self.assertEqual(error, None) + self.assertEqual(content, "Recovered via compact retry") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_validate_backup_outputs.py b/tests/test_validate_backup_outputs.py new file mode 100644 index 0000000..2db176e --- /dev/null +++ b/tests/test_validate_backup_outputs.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import json +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + + +SCRIPT_PATH = Path(__file__).resolve().parents[1] / "scripts" / "validate_backup_outputs.py" + + +def run_validator(*args: str) -> subprocess.CompletedProcess[str]: + cmd = [sys.executable, str(SCRIPT_PATH), *args] + return subprocess.run(cmd, check=False, text=True, capture_output=True) + + +class ValidateBackupOutputsTests(unittest.TestCase): + def test_intune_validation_passes_with_required_outputs(self) -> None: + with tempfile.TemporaryDirectory() as td: + base = Path(td) + root = base / "tenant-state" / "intune" + reports = base / "tenant-state" / "reports" / "intune" + (root / "Device Configurations").mkdir(parents=True, exist_ok=True) + reports.mkdir(parents=True, exist_ok=True) + + (root / "Device Configurations" / "policy__id.json").write_text( + json.dumps({"id": "id-1", "displayName": "Policy"}) + "\n", + encoding="utf-8", + ) + (reports / "policy-assignments.md").write_text("# report\n", encoding="utf-8") + (reports / "policy-assignments.csv").write_text("a,b\n", encoding="utf-8") + (reports / "object-inventory-all.csv").write_text("a,b\n", encoding="utf-8") + + result = run_validator( + "--workload", + "intune", + "--mode", + "light", + "--root", + str(root), + "--reports-root", + str(reports), + ) + self.assertEqual(result.returncode, 0, msg=result.stdout + result.stderr) + + def test_intune_validation_fails_when_assignment_csv_missing(self) -> None: + with tempfile.TemporaryDirectory() as td: + base = Path(td) + root = base / "tenant-state" / "intune" + reports = base / "tenant-state" / "reports" / "intune" + (root / "Device Configurations").mkdir(parents=True, exist_ok=True) + reports.mkdir(parents=True, exist_ok=True) + + (root / "Device Configurations" / "policy__id.json").write_text("{}", encoding="utf-8") + (reports / "policy-assignments.md").write_text("# report\n", encoding="utf-8") + (reports / "object-inventory-all.csv").write_text("a,b\n", encoding="utf-8") + + result = run_validator( + "--workload", + "intune", + "--mode", + "full", + "--root", + str(root), + "--reports-root", + str(reports), + ) + self.assertNotEqual(result.returncode, 0) + self.assertIn("Missing Intune assignment CSV report", result.stdout) + + def test_entra_light_validation_allows_non_effective_enterprise_apps(self) -> None: + with tempfile.TemporaryDirectory() as td: + base = Path(td) + root = base / "tenant-state" / "entra" + reports = base / "tenant-state" / "reports" / "entra" + (root / "Named Locations").mkdir(parents=True, exist_ok=True) + reports.mkdir(parents=True, exist_ok=True) + + (root / "Named Locations" / "Named Locations.md").write_text("# named\n", encoding="utf-8") + (reports / "object-inventory-all.csv").write_text("a,b\n", encoding="utf-8") + + result = run_validator( + "--workload", + "entra", + "--mode", + "light", + "--root", + str(root), + "--reports-root", + str(reports), + "--include-named-locations", + "true", + "--include-enterprise-applications", + "true", + "--include-enterprise-applications-effective", + "false", + ) + self.assertEqual(result.returncode, 0, msg=result.stdout + result.stderr) + + def test_entra_light_validation_allows_non_effective_app_registrations(self) -> None: + with tempfile.TemporaryDirectory() as td: + base = Path(td) + root = base / "tenant-state" / "entra" + reports = base / "tenant-state" / "reports" / "entra" + (root / "Named Locations").mkdir(parents=True, exist_ok=True) + reports.mkdir(parents=True, exist_ok=True) + + (root / "Named Locations" / "Named Locations.md").write_text("# named\n", encoding="utf-8") + (reports / "object-inventory-all.csv").write_text("a,b\n", encoding="utf-8") + + result = run_validator( + "--workload", + "entra", + "--mode", + "light", + "--root", + str(root), + "--reports-root", + str(reports), + "--include-named-locations", + "true", + "--include-app-registrations", + "true", + "--include-app-registrations-effective", + "false", + ) + self.assertEqual(result.returncode, 0, msg=result.stdout + result.stderr) + + def test_entra_validation_fails_when_required_index_missing(self) -> None: + with tempfile.TemporaryDirectory() as td: + base = Path(td) + root = base / "tenant-state" / "entra" + reports = base / "tenant-state" / "reports" / "entra" + root.mkdir(parents=True, exist_ok=True) + reports.mkdir(parents=True, exist_ok=True) + (reports / "object-inventory-all.csv").write_text("a,b\n", encoding="utf-8") + + result = run_validator( + "--workload", + "entra", + "--mode", + "full", + "--root", + str(root), + "--reports-root", + str(reports), + "--include-named-locations", + "true", + ) + self.assertNotEqual(result.returncode, 0) + self.assertIn("Missing Entra export index for 'Named Locations'", result.stdout) + + +if __name__ == "__main__": + unittest.main()