Sync from dev @ 252c1cf

Source: main (252c1cf)
Excluded: live tenant exports, generated artifacts, and dev-only tooling.
This commit is contained in:
2026-04-17 15:57:35 +02:00
commit 17d745bdac
52 changed files with 15601 additions and 0 deletions

View File

@@ -0,0 +1,316 @@
#!/usr/bin/env python3
"""Apply per-policy reviewer reject decisions on rolling drift PRs.
Reviewer decision format inside auto Change Needed threads:
- /reject -> remove this file-level drift from rolling PR (reset to baseline)
- /accept -> keep this file-level drift
Latest decision command in the thread wins.
"""
from __future__ import annotations
import argparse
import base64
import json
import os
import re
import subprocess
import sys
import urllib.parse
from pathlib import Path
from typing import Any
# common.py lives in the same directory; ensure it can be imported when the
# script is executed directly.
_sys_path_inserted = False
if __file__:
_script_dir = str(Path(__file__).resolve().parent)
if _script_dir not in sys.path:
sys.path.insert(0, _script_dir)
_sys_path_inserted = True
import common
if _sys_path_inserted:
sys.path.pop(0)
_request_json = common.request_json
_run_git = common.run_git
_configure_git_identity = common.configure_git_identity
AUTO_TICKET_THREAD_PREFIX = "AUTO-CHANGE-TICKET:"
THREAD_STATUS_FIXED = 2
THREAD_STATUS_WONT_FIX = 3
THREAD_STATUS_CLOSED = 4
THREAD_STATUS_BY_DESIGN = 5
DECISION_RE = re.compile(r"(?im)^\s*(?:/|#)?(?P<decision>reject|accept)\b")
def _run_diff_name_only(repo_root: str, baseline_branch: str, drift_branch: str) -> str:
three_dot = f"origin/{baseline_branch}...origin/{drift_branch}"
two_dot = f"origin/{baseline_branch}..origin/{drift_branch}"
try:
return _run_git(repo_root, ["diff", "--name-only", three_dot])
except RuntimeError as exc:
stderr = str(exc).lower()
if "no merge base" not in stderr:
raise
print(
"WARNING: No merge base for rolling branches "
f"(origin/{baseline_branch}, origin/{drift_branch}); using direct diff."
)
return _run_git(repo_root, ["diff", "--name-only", two_dot])
def _git_path_exists(repo_root: str, treeish: str, path: str) -> bool:
proc = subprocess.run(
["git", "cat-file", "-e", f"{treeish}:{path}"],
cwd=repo_root,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
return proc.returncode == 0
def _normalize_branch_name(branch: str) -> str:
b = branch.strip()
if b.startswith("refs/heads/"):
return b[len("refs/heads/") :]
return b
def _thread_status_code(thread: dict[str, Any]) -> int:
status = thread.get("status")
if isinstance(status, int):
return status
if isinstance(status, str):
mapping = {
"fixed": THREAD_STATUS_FIXED,
"wontfix": THREAD_STATUS_WONT_FIX,
"closed": THREAD_STATUS_CLOSED,
"bydesign": THREAD_STATUS_BY_DESIGN,
}
return mapping.get(status.strip().lower(), 1)
return 1
def _is_thread_resolved(thread: dict[str, Any]) -> bool:
return _thread_status_code(thread) in (
THREAD_STATUS_FIXED,
THREAD_STATUS_WONT_FIX,
THREAD_STATUS_CLOSED,
THREAD_STATUS_BY_DESIGN,
)
def _ticket_path_from_content(content: str) -> str | None:
marker_re = re.compile(r"<!--\s*" + re.escape(AUTO_TICKET_THREAD_PREFIX) + r"(?P<id>[A-Za-z0-9_-]+)\s*-->")
match = marker_re.search(content or "")
if not match:
return None
encoded = match.group("id")
padding = "=" * ((4 - len(encoded) % 4) % 4)
try:
return base64.urlsafe_b64decode((encoded + padding).encode("ascii")).decode("utf-8")
except Exception:
return None
def _is_doc_like(path: str) -> bool:
lp = path.lower()
return lp.endswith(".md") or lp.endswith(".markdown") or "/docs/" in lp
def _is_report_like(path: str) -> bool:
lp = path.lower()
return "/reports/" in lp or "assignment report" in lp
def _latest_thread_decision(comments: list[dict[str, Any]]) -> str | None:
decision: str | None = None
def _comment_sort_key(c: dict[str, Any]) -> tuple[int, int]:
try:
cid = int(c.get("id", 0))
except Exception:
cid = 0
try:
parent = int(c.get("parentCommentId", 0))
except Exception:
parent = 0
return (cid, parent)
for comment in sorted(comments, key=_comment_sort_key):
content = str(comment.get("content", "") or "")
match = DECISION_RE.search(content)
if match:
decision = match.group("decision").lower()
return decision
def _post_thread_comment(repo_api: str, pr_id: int, thread_id: int, token: str, content: str) -> None:
_request_json(
f"{repo_api}/pullrequests/{pr_id}/threads/{thread_id}/comments?api-version=7.1",
token=token,
method="POST",
body={
"parentCommentId": 0,
"content": content,
"commentType": 1,
},
)
def main() -> int:
parser = argparse.ArgumentParser(description="Apply reviewer /reject decisions for rolling PR threads")
parser.add_argument("--repo-root", required=True)
parser.add_argument("--workload", required=True)
parser.add_argument("--drift-branch", required=True)
parser.add_argument("--baseline-branch", required=True)
args = parser.parse_args()
token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip()
if not token:
raise SystemExit("SYSTEM_ACCESSTOKEN is empty.")
collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/")
project = os.environ["SYSTEM_TEAMPROJECT"]
repository_id = os.environ["BUILD_REPOSITORY_ID"]
drift_branch = _normalize_branch_name(args.drift_branch)
baseline_branch = _normalize_branch_name(args.baseline_branch)
repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}"
source_ref = f"refs/heads/{drift_branch}"
target_ref = f"refs/heads/{baseline_branch}"
query = urllib.parse.urlencode(
{
"searchCriteria.status": "active",
"searchCriteria.sourceRefName": source_ref,
"searchCriteria.targetRefName": target_ref,
"api-version": "7.1",
},
quote_via=urllib.parse.quote,
safe="/",
)
payload = _request_json(f"{repo_api}/pullrequests?{query}", token=token)
prs = payload.get("value", []) if isinstance(payload, dict) else []
if not prs:
print("No active rolling PR found; skipping reviewer reject sync.")
return 0
pr = prs[0]
pr_id = int(pr.get("pullRequestId"))
_run_git(args.repo_root, ["fetch", "--quiet", "origin", baseline_branch, drift_branch])
diff_paths = _run_diff_name_only(args.repo_root, baseline_branch, drift_branch)
changed_paths = {
p.strip()
for p in diff_paths.splitlines()
if p.strip() and not _is_doc_like(p.strip()) and not _is_report_like(p.strip())
}
if not changed_paths:
print("No changed policy paths in rolling PR; nothing to auto-reject.")
return 0
threads_payload = _request_json(f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", token=token)
threads = threads_payload.get("value", []) if isinstance(threads_payload, dict) else []
rejections: list[tuple[str, int]] = []
examined_ticket_threads = 0
for thread in threads:
comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else []
marker_path: str | None = None
for c in comments:
marker_path = _ticket_path_from_content(str(c.get("content", "") or ""))
if marker_path:
break
if not marker_path:
continue
examined_ticket_threads += 1
if marker_path not in changed_paths:
continue
decision = _latest_thread_decision(comments)
if decision == "reject":
try:
thread_id = int(thread.get("id"))
except Exception:
thread_id = -1
rejections.append((marker_path, thread_id))
if not rejections:
print(
"No /reject decisions found in auto policy threads "
f"(examined={examined_ticket_threads}, changed_paths={len(changed_paths)})."
)
return 0
print(
"Detected /reject decisions in auto policy threads: "
f"{len(rejections)} (examined={examined_ticket_threads})."
)
_run_git(args.repo_root, ["checkout", "--quiet", "--force", "-B", drift_branch, f"origin/{drift_branch}"])
changed = 0
baseline_tree = f"origin/{baseline_branch}"
for path, _thread_id in sorted(set(rejections)):
if _git_path_exists(args.repo_root, baseline_tree, path):
_run_git(args.repo_root, ["checkout", baseline_tree, "--", path])
_run_git(args.repo_root, ["add", "--", path])
changed += 1
else:
file_abs = os.path.join(args.repo_root, path)
if os.path.exists(file_abs):
_run_git(args.repo_root, ["rm", "-f", "--", path])
changed += 1
proc = subprocess.run(
["git", "diff", "--cached", "--quiet"],
cwd=args.repo_root,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
if proc.returncode == 0:
print("Reviewer /reject decisions found, but no effective diff remained after baseline reset.")
return 0
_configure_git_identity(args.repo_root)
commit_msg = f"Apply reviewer /reject decisions ({args.workload})"
_run_git(args.repo_root, ["commit", "-m", commit_msg])
_run_git(args.repo_root, ["push", "--force-with-lease", "origin", f"HEAD:{drift_branch}"])
for path, thread_id in rejections:
if thread_id <= 0:
continue
_post_thread_comment(
repo_api=repo_api,
pr_id=pr_id,
thread_id=thread_id,
token=token,
content=(
"Auto-action: /reject detected. This policy drift was reset to baseline on the rolling drift branch, "
"so it is removed from the PR diff.\n\n"
"If tenant rollback is required immediately, run restore pipeline as remediation."
),
)
print(
f"Applied reviewer /reject decisions for {changed} path(s) in PR #{pr_id}; "
f"drift branch '{drift_branch}' updated."
)
return 0
if __name__ == "__main__":
try:
raise SystemExit(main())
except Exception as exc:
print(f"WARNING: Failed to apply reviewer /reject decisions: {exc}", file=sys.stderr)
raise

View File

@@ -0,0 +1,395 @@
#!/usr/bin/env python3
"""Commit Entra drift changes with best-effort change-author attribution."""
from __future__ import annotations
import argparse
import datetime as dt
import json
import pathlib
import subprocess
import sys
import urllib.error
import urllib.parse
import urllib.request
from collections import defaultdict
from dataclasses import dataclass
def _git_run(repo_root: pathlib.Path, args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]:
proc = subprocess.run(
["git", *args],
cwd=str(repo_root),
check=False,
capture_output=True,
text=True,
)
if check and proc.returncode != 0:
stderr = (proc.stderr or "").strip()
raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}")
return proc
def _set_output_var(name: str, value: str, is_output: bool = True) -> None:
suffix = ";isOutput=true" if is_output else ""
print(f"##vso[task.setvariable variable={name}{suffix}]{value}")
def _warning(message: str) -> None:
print(f"##vso[task.logissue type=warning]{message}")
def _parse_backup_start(value: str) -> dt.datetime:
candidate = value.strip()
if not candidate:
raise ValueError("Missing required --backup-start value. Ensure the pipeline sets BACKUP_START in the backup_entra job before invoking commit_entra_drift.py.")
parsed = dt.datetime.strptime(candidate, "%Y.%m.%d:%H.%M.%S")
return parsed.replace(tzinfo=dt.timezone.utc)
def _format_filter_datetime(value: dt.datetime) -> str:
return value.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _last_entra_commit_date(repo_root: pathlib.Path, depth: int = 30) -> dt.datetime | None:
_git_run(repo_root, ["fetch", f"--depth={depth}"], check=False)
proc = _git_run(
repo_root,
[
"--no-pager",
"log",
"--no-show-signature",
f"-{depth}",
"--format=%s%%%cI",
],
)
for raw in proc.stdout.splitlines():
line = raw.strip()
if not line or "%%%" not in line:
continue
subject, iso_date = line.split("%%%", 1)
if subject.endswith(" (Entra)") and len(subject) >= 18 and subject[4] == ".":
try:
return dt.datetime.fromisoformat(iso_date.replace("Z", "+00:00")).astimezone(dt.timezone.utc)
except ValueError:
continue
return None
def _request_json(url: str, token: str) -> dict:
req = urllib.request.Request(
url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/json",
},
method="GET",
)
with urllib.request.urlopen(req, timeout=60) as resp:
return json.loads(resp.read().decode("utf-8"))
@dataclass(frozen=True)
class Identity:
key: str
value: str
name: str
def _display_or_localpart(display_name: str, principal_name: str) -> str:
display_name = (display_name or "").strip()
if display_name:
return display_name
principal_name = (principal_name or "").strip()
if "@" in principal_name:
return principal_name.split("@", 1)[0]
return principal_name
def _extract_identity_from_audit(entry: dict) -> Identity | None:
initiated_by = entry.get("initiatedBy")
if not isinstance(initiated_by, dict):
return None
user = initiated_by.get("user")
if isinstance(user, dict):
principal_name = str(user.get("userPrincipalName") or user.get("email") or "").strip()
display_name = str(user.get("displayName") or "").strip()
if principal_name:
return Identity(
key=f"user:{principal_name}",
value=principal_name,
name=_display_or_localpart(display_name, principal_name),
)
if display_name:
return Identity(
key=f"display:{display_name}",
value=display_name,
name=display_name,
)
app = initiated_by.get("app")
if isinstance(app, dict):
display_name = str(app.get("displayName") or "").strip()
if display_name:
return Identity(
key=f"sp:{display_name}",
value=f"{display_name} (SP)",
name=display_name,
)
return None
def _fetch_directory_audits(
token: str,
last_commit_date: dt.datetime | None,
backup_start: dt.datetime,
) -> list[dict]:
params = {
"$top": "999",
"$select": "activityDateTime,activityDisplayName,category,result,initiatedBy,targetResources",
}
filter_parts = [f"activityDateTime le {_format_filter_datetime(backup_start)}"]
if last_commit_date is not None:
filter_parts.append(f"activityDateTime ge {_format_filter_datetime(last_commit_date)}")
params["$filter"] = " and ".join(filter_parts)
url = f"https://graph.microsoft.com/v1.0/auditLogs/directoryAudits?{urllib.parse.urlencode(params)}"
results: list[dict] = []
while url:
payload = _request_json(url, token)
value = payload.get("value")
if isinstance(value, list):
results.extend(item for item in value if isinstance(item, dict))
next_link = payload.get("@odata.nextLink")
url = str(next_link).strip() if next_link else ""
return results
def _resource_id_from_path(path: str) -> str:
pure = pathlib.PurePosixPath(path)
if pure.suffix.lower() != ".json":
return ""
stem = pure.stem
if "__" not in stem:
return ""
return stem.rsplit("__", 1)[-1].lstrip("_").strip()
def _category_key(path: str) -> str:
pure = pathlib.PurePosixPath(path)
parts = pure.parts
if len(parts) < 3:
return ""
return "/".join(parts[:3])
def _fallback_identity(name: str, email: str) -> Identity:
return Identity(key=f"fallback:{email}", value=email, name=name)
def _effective_fallback_identity(
build_reason: str,
requested_for: str,
requested_for_email: str,
service_name: str,
service_email: str,
) -> Identity:
requested_for_email = requested_for_email.strip()
if build_reason.strip() != "Schedule" and "@" in requested_for_email:
requested_for = requested_for.strip() or requested_for_email.split("@", 1)[0]
return _fallback_identity(requested_for, requested_for_email)
return _fallback_identity(service_name.strip(), service_email.strip())
def _changed_files(repo_root: pathlib.Path, workload_root: str) -> list[str]:
proc = _git_run(repo_root, ["diff", "--cached", "--name-only", "--", workload_root])
return [line.strip() for line in proc.stdout.splitlines() if line.strip()]
def _remote_diff_is_empty(repo_root: pathlib.Path, drift_branch: str, workload_root: str) -> bool:
remote_ref = f"refs/remotes/origin/{drift_branch}"
if _git_run(repo_root, ["show-ref", "--verify", "--quiet", remote_ref], check=False).returncode != 0:
return False
return _git_run(repo_root, ["diff", "--quiet", f"origin/{drift_branch}", "--", workload_root], check=False).returncode == 0
def _build_author_groups(
changed_files: list[str],
audits: list[dict],
fallback: Identity,
) -> tuple[dict[str, dict[str, list[str] | list[Identity]]], int]:
identities_by_resource: dict[str, dict[str, Identity]] = defaultdict(dict)
for audit in audits:
result = str(audit.get("result") or "").strip().lower()
if result and result != "success":
continue
identity = _extract_identity_from_audit(audit)
if identity is None:
continue
target_resources = audit.get("targetResources")
if not isinstance(target_resources, list):
continue
for target in target_resources:
if not isinstance(target, dict):
continue
resource_id = str(target.get("id") or "").strip()
if resource_id:
identities_by_resource[resource_id][identity.key] = identity
resolved_by_category: dict[str, dict[str, Identity]] = defaultdict(dict)
file_identities: dict[str, list[Identity]] = {}
unresolved_count = 0
for path in changed_files:
resource_id = _resource_id_from_path(path)
identities = list(identities_by_resource.get(resource_id, {}).values())
if identities:
file_identities[path] = sorted(identities, key=lambda item: item.key)
for identity in file_identities[path]:
resolved_by_category[_category_key(path)][identity.key] = identity
else:
file_identities[path] = []
if resource_id:
unresolved_count += 1
for path in changed_files:
if file_identities[path]:
continue
category_identities = list(resolved_by_category.get(_category_key(path), {}).values())
if category_identities:
file_identities[path] = sorted(category_identities, key=lambda item: item.key)
else:
file_identities[path] = [fallback]
grouped: dict[str, dict[str, list[str] | list[Identity]]] = {}
for path in changed_files:
identities = file_identities[path] or [fallback]
group_key = "&".join(identity.key for identity in identities)
entry = grouped.setdefault(group_key, {"files": [], "identities": identities})
files = entry["files"]
assert isinstance(files, list)
files.append(path)
return grouped, unresolved_count
def _commit_group(
repo_root: pathlib.Path,
files: list[str],
identities: list[Identity],
backup_start: dt.datetime,
) -> None:
for path in files:
print(f"\t- Adding {repo_root / path}")
_git_run(repo_root, ["add", "--all", "--", path])
author_name = ", ".join(identity.name for identity in identities)
author_email = ", ".join(identity.value for identity in identities)
print(f"\t- Setting commit author(s): {author_name}")
_git_run(repo_root, ["config", "user.name", author_name])
_git_run(repo_root, ["config", "user.email", author_email])
commit_date = backup_start.astimezone(dt.timezone.utc).strftime("%Y.%m.%d_%H.%M")
commit_name = f"{commit_date} -- {author_name} (Entra)"
print(f"\t- Creating commit '{commit_name}'")
_git_run(repo_root, ["commit", "-m", commit_name])
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repo-root", required=True)
parser.add_argument("--workload-root", required=True)
parser.add_argument("--baseline-branch", required=True)
parser.add_argument("--drift-branch", required=True)
parser.add_argument("--access-token", required=True)
parser.add_argument("--service-name", required=True)
parser.add_argument("--service-email", required=True)
parser.add_argument("--build-reason", default="")
parser.add_argument("--requested-for", default="")
parser.add_argument("--requested-for-email", default="")
parser.add_argument("--backup-start", required=True)
args = parser.parse_args()
repo_root = pathlib.Path(args.repo_root).resolve()
workload_root = args.workload_root.strip().strip("/")
fallback = _effective_fallback_identity(
build_reason=args.build_reason,
requested_for=args.requested_for,
requested_for_email=args.requested_for_email,
service_name=args.service_name,
service_email=args.service_email,
)
_git_run(repo_root, ["config", "user.name", fallback.name])
_git_run(repo_root, ["config", "user.email", fallback.value])
_git_run(repo_root, ["add", "--all", "--", workload_root])
changed_files = _changed_files(repo_root, workload_root)
if not changed_files:
print("No Entra change detected")
_set_output_var("CHANGE_DETECTED", "0")
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "0")
return 0
if _remote_diff_is_empty(repo_root, args.drift_branch, workload_root):
print("No Entra change detected (snapshot identical to existing drift branch)")
_set_output_var("CHANGE_DETECTED", "0")
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "1")
return 0
backup_start = _parse_backup_start(args.backup_start)
last_commit_date = _last_entra_commit_date(repo_root)
if last_commit_date is None:
_warning("Unable to obtain date of the last Entra backup config commit. All Entra audit events in the current query window will be considered.")
audits: list[dict] = []
try:
print("Getting Entra directory audit logs")
print(f"\t- from: '{last_commit_date}' (UTC) to: '{backup_start}' (UTC)")
audits = _fetch_directory_audits(args.access_token, last_commit_date, backup_start)
except urllib.error.HTTPError as exc:
if exc.code in (401, 403):
_warning("Graph token cannot read Entra directory audit logs. Falling back to pipeline identity for unresolved Entra changes.")
else:
raise
except Exception as exc: # pragma: no cover - defensive path for pipeline runtime issues
_warning(f"Unable to query Entra directory audit logs ({exc}). Falling back to pipeline identity for unresolved Entra changes.")
groups, unresolved_count = _build_author_groups(changed_files, audits, fallback)
if unresolved_count > 0:
_warning(
f"Unable to resolve author from Entra audit logs for {unresolved_count} of {len(changed_files)} changed files. Fallback identity used where needed."
)
_git_run(repo_root, ["reset", "--quiet", "--", workload_root])
print("\nCommit changes")
for group in groups.values():
files = group["files"]
identities = group["identities"]
assert isinstance(files, list)
assert isinstance(identities, list)
_commit_group(repo_root, files, identities, backup_start)
unpushed = _git_run(repo_root, ["cherry", "-v", f"origin/{args.baseline_branch}"]).stdout.strip()
if not unpushed:
_warning("Nothing to commit?! This shouldn't happen.")
_set_output_var("CHANGE_DETECTED", "0")
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "0")
return 0
_git_run(repo_root, ["push", "--force-with-lease", "origin", f"HEAD:{args.drift_branch}"])
commit_sha = _git_run(repo_root, ["rev-parse", "HEAD"]).stdout.strip()
modification_authors = sorted({identity.value for group in groups.values() for identity in group["identities"]}) # type: ignore[index]
_set_output_var("CHANGE_DETECTED", "1")
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "1")
_set_output_var("COMMIT_SHA", commit_sha)
_set_output_var("COMMIT_DATE", backup_start.strftime("%Y.%m.%d_%H.%M"))
_set_output_var("MODIFICATION_AUTHOR", ", ".join(modification_authors))
return 0
if __name__ == "__main__":
try:
raise SystemExit(main())
except Exception as exc:
print(str(exc), file=sys.stderr)
raise

164
scripts/common.py Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""Shared utilities for Intune / Entra drift backup scripts."""
from __future__ import annotations
import json
import os
import re
import subprocess
import time
import urllib.error
import urllib.request
from typing import Any
def env_text(name: str, default: str = "") -> str:
"""Read and sanitize an environment variable, treating unresolved Azure DevOps
macros $(...) as empty.
"""
raw = os.environ.get(name)
if raw is None:
return default
value = raw.strip()
if re.fullmatch(r"\$\([^)]+\)", value):
return default
if not value:
return default
return value
def env_bool(name: str, default: bool = False) -> bool:
"""Interpret an environment variable as a boolean."""
raw = env_text(name, "")
if not raw:
return default
return raw.lower() in {"1", "true", "yes", "y", "on"}
def normalize_exclude_csv(value: str) -> str:
"""Normalize an exclude CSV value, treating sentinel values as empty."""
normalized = str(value or "").strip()
if normalized.lower() in {"", "none", "null", "n/a", "-", "_none_"}:
return ""
return normalized
def normalize_merge_strategy(value: str) -> str:
"""Normalize a merge strategy string to an Azure DevOps API value."""
raw = (value or "").strip().lower().replace("-", "").replace("_", "")
aliases = {
"nofastforward": "noFastForward",
"mergecommit": "noFastForward",
"merge": "noFastForward",
"squash": "squash",
"rebase": "rebase",
"rebasefastforward": "rebase",
"rebaseff": "rebase",
"rebasemerge": "rebaseMerge",
}
return aliases.get(raw, "rebase")
def _get_retry_after_seconds(error: urllib.error.HTTPError) -> float | None:
try:
retry_after = error.headers.get("Retry-After")
if retry_after:
return float(retry_after)
except Exception:
pass
return None
def request_json(
url: str,
method: str = "GET",
body: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
token: str | None = None,
timeout: float = 60,
max_retries: int = 0,
) -> Any:
"""Make a JSON HTTP request and return the parsed response.
If *token* is provided, an Authorization header is added automatically.
If *max_retries* is greater than zero, transient HTTP errors (429, 500,
502, 503, 504) are retried with exponential back-off.
"""
req_headers: dict[str, str] = {
"Accept": "application/json",
}
if token is not None:
req_headers["Authorization"] = f"Bearer {token}"
if headers is not None:
req_headers.update(headers)
payload: bytes | None = None
if body is not None:
payload = json.dumps(body).encode("utf-8")
req_headers.setdefault("Content-Type", "application/json")
retry_codes = {429, 500, 502, 503, 504}
last_error: Exception | None = None
for attempt in range(max_retries + 1):
req = urllib.request.Request(
url,
data=payload,
method=method,
headers=req_headers,
)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
last_error = exc
if exc.code not in retry_codes or attempt == max_retries:
raise
retry_after = _get_retry_after_seconds(exc)
sleep = retry_after if retry_after is not None else (2 ** attempt)
time.sleep(sleep)
except urllib.error.URLError as exc:
last_error = exc
if attempt == max_retries:
raise
time.sleep(2 ** attempt)
# Should never be reached; satisfy type checker.
if last_error is not None:
raise last_error
raise RuntimeError("request_json exhausted all retries")
def run_git(repo_root: str | os.PathLike[str], args: list[str], check: bool = True) -> str:
"""Run a git command and return stdout as a stripped string."""
proc = subprocess.run(
["git", *args],
cwd=str(repo_root),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
if check and proc.returncode != 0:
stderr = (proc.stderr or "").strip()
raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}")
return proc.stdout.strip()
def configure_git_identity(
repo_root: str | os.PathLike[str],
fallback_name: str | None = None,
fallback_email: str | None = None,
) -> None:
"""Configure git user.name and user.email from pipeline env vars."""
requested_for = (os.environ.get("BUILD_REQUESTEDFOR") or "").strip()
requested_for_email = (os.environ.get("BUILD_REQUESTEDFOREMAIL") or "").strip()
fallback_name = (fallback_name or os.environ.get("USER_NAME") or "ASTRAL Backup Service").strip()
fallback_email = (fallback_email or os.environ.get("USER_EMAIL") or "intune-backup@local.invalid").strip()
author_name = requested_for or fallback_name
author_email = requested_for_email if "@" in requested_for_email else fallback_email
run_git(repo_root, ["config", "user.name", author_name])
run_git(repo_root, ["config", "user.email", author_email])

View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""
Lightweight Azure OpenAI availability precheck for pipeline diagnostics.
This script is intentionally non-blocking: it always exits 0.
"""
from __future__ import annotations
import json
import os
import sys
from urllib.error import HTTPError, URLError
from urllib.parse import quote, urlsplit
from urllib.request import Request, urlopen
def _env(name: str, default: str = "") -> str:
return os.environ.get(name, default).strip()
def _set_pipeline_var(name: str, value: str) -> None:
print(f"##vso[task.setvariable variable={name}]{value}")
def _normalize_aoai_endpoint(endpoint: str) -> str:
cleaned = endpoint.strip().rstrip("/")
if not cleaned:
return cleaned
parsed = urlsplit(cleaned)
if parsed.scheme and parsed.netloc:
cleaned = f"{parsed.scheme}://{parsed.netloc}"
marker = "/openai"
idx = cleaned.lower().find(marker)
if idx != -1:
return cleaned[:idx]
return cleaned
def _preferred_aoai_token_param(deployment_name: str) -> str:
override = _env("AZURE_OPENAI_TOKEN_PARAM", "").lower()
if override in {"max_tokens", "max_completion_tokens"}:
return override
if deployment_name.strip().lower().startswith("gpt-5"):
return "max_completion_tokens"
return "max_tokens"
def _aoai_token_param_candidates(deployment_name: str) -> list[str]:
preferred = _preferred_aoai_token_param(deployment_name)
alternate = "max_completion_tokens" if preferred == "max_tokens" else "max_tokens"
return [preferred, alternate]
def _preferred_aoai_temperature(deployment_name: str) -> float | None:
override = _env("AZURE_OPENAI_TEMPERATURE", "").lower()
if override in {"default", "none", "omit"}:
return None
if override:
try:
return float(override)
except ValueError:
return None
if deployment_name.strip().lower().startswith("gpt-5"):
return None
return 0.0
def _aoai_temperature_candidates(deployment_name: str) -> list[float | None]:
preferred = _preferred_aoai_temperature(deployment_name)
if preferred is None:
return [None]
return [preferred, None]
def main() -> int:
enabled = _env("ENABLE_PR_AI_SUMMARY", "true").lower() == "true"
if not enabled:
print("Azure OpenAI precheck skipped: ENABLE_PR_AI_SUMMARY=false")
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
endpoint = _env("AZURE_OPENAI_ENDPOINT")
deployment = _env("AZURE_OPENAI_DEPLOYMENT")
api_key = _env("AZURE_OPENAI_API_KEY")
api_version = _env("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
if not endpoint or not deployment or not api_key:
print("Azure OpenAI precheck skipped: missing endpoint/deployment/api-key variable")
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
endpoint_raw = endpoint
endpoint = _normalize_aoai_endpoint(endpoint_raw)
deployment_url = f"{endpoint}/openai/deployments/{quote(deployment)}/chat/completions?api-version={quote(api_version)}"
v1_url = f"{endpoint}/openai/v1/chat/completions"
print("Azure OpenAI precheck: starting")
print(f"- endpoint(raw): {endpoint_raw}")
print(f"- endpoint(normalized): {endpoint}")
print(f"- deployment: {deployment}")
print(f"- api_version: {api_version}")
prefer_v1 = endpoint.lower().endswith(".cognitiveservices.azure.com")
health_messages = [
{"role": "system", "content": "You are a health-check assistant."},
{"role": "user", "content": "Reply with: OK"},
]
for temperature in _aoai_temperature_candidates(deployment):
temperature_unsupported = False
for token_param in _aoai_token_param_candidates(deployment):
deployment_payload = {
"messages": health_messages,
token_param: 16,
}
v1_payload = {
"model": deployment,
"messages": health_messages,
token_param: 16,
}
if temperature is not None:
deployment_payload["temperature"] = temperature
v1_payload["temperature"] = temperature
routes = (
[("v1", v1_url, v1_payload), ("deployments", deployment_url, deployment_payload)]
if prefer_v1
else [("deployments", deployment_url, deployment_payload), ("v1", v1_url, v1_payload)]
)
token_param_unsupported = False
for route_name, route_url, payload in routes:
req = Request(
url=route_url,
method="POST",
data=json.dumps(payload).encode("utf-8"),
headers={
"Content-Type": "application/json",
"api-key": api_key,
},
)
try:
with urlopen(req, timeout=45) as resp:
_ = json.loads(resp.read().decode("utf-8"))
print(f"Azure OpenAI precheck: SUCCESS via {route_name} route")
_set_pipeline_var("AOAI_AVAILABLE", "1")
return 0
except HTTPError as exc:
raw = ""
try:
raw = exc.read().decode("utf-8", errors="replace")
except Exception:
raw = ""
print(f"Azure OpenAI precheck: HTTP {exc.code} via {route_name} route")
if raw:
print(raw)
if exc.code == 400:
raw_lower = raw.lower()
if "unsupported parameter" in raw_lower and f"'{token_param}'" in raw_lower:
token_param_unsupported = True
break
if "unsupported value" in raw_lower and "'temperature'" in raw_lower and temperature is not None:
temperature_unsupported = True
break
if exc.code == 404:
# Try fallback route first.
continue
if exc.code in (401, 403):
print("Hint: Check AZURE_OPENAI_API_KEY and endpoint/resource pairing.")
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
if exc.code == 400:
print("Hint: Check model/deployment name and API version compatibility.")
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
except URLError as exc:
print(f"Azure OpenAI precheck: network error via {route_name} route: {exc}")
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
except Exception as exc: # pragma: no cover
print(f"Azure OpenAI precheck: unexpected error via {route_name} route: {exc}")
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
if temperature_unsupported:
break
if not token_param_unsupported:
break
if not temperature_unsupported:
break
print("Azure OpenAI precheck: no successful response from tested routes/token-params")
print("Hint: Verify AZURE_OPENAI_ENDPOINT points to the resource root, without /openai path suffix.")
print("Hint: Verify AZURE_OPENAI_DEPLOYMENT is the deployment name (for v1 this is passed as model).")
_set_pipeline_var("AOAI_AVAILABLE", "0")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,651 @@
#!/usr/bin/env python3
"""Create/update rolling drift PR and optionally queue remediation after rejection."""
from __future__ import annotations
import argparse
import hashlib
import json
import os
import subprocess
import sys
import urllib.parse
from pathlib import Path
from typing import Any
# common.py lives in the same directory; ensure it can be imported when the
# script is executed directly.
_sys_path_inserted = False
if __file__:
_script_dir = str(Path(__file__).resolve().parent)
if _script_dir not in sys.path:
sys.path.insert(0, _script_dir)
_sys_path_inserted = True
import common
if _sys_path_inserted:
sys.path.pop(0)
_env_text = common.env_text
_env_bool = common.env_bool
_normalize_exclude_csv = common.normalize_exclude_csv
_normalize_merge_strategy = common.normalize_merge_strategy
_request_json = common.request_json
_run_git = common.run_git
def _query_prs(
repo_api: str,
headers: dict[str, str],
source_ref: str,
target_ref: str,
status: str,
) -> list[dict[str, Any]]:
query = urllib.parse.urlencode(
{
"searchCriteria.status": status,
"searchCriteria.sourceRefName": source_ref,
"searchCriteria.targetRefName": target_ref,
"api-version": "7.1",
},
quote_via=urllib.parse.quote,
safe="/",
)
url = f"{repo_api}/pullrequests?{query}"
payload = _request_json(url, headers=headers)
return payload.get("value", []) if isinstance(payload, dict) else []
def _normalize_branch(branch: str) -> str:
b = branch.strip()
if b.startswith("refs/heads/"):
return b[len("refs/heads/") :]
return b
def _ref_from_branch(branch: str) -> str:
return f"refs/heads/{_normalize_branch(branch)}"
def _pr_web_url(pr_payload: dict[str, Any]) -> str:
pr_id = pr_payload.get("pullRequestId")
return (
pr_payload.get("url", "")
.replace("_apis/git/repositories", "_git")
.replace(f"/pullRequests/{pr_id}", f"/pullrequest/{pr_id}")
)
def _current_tree_id(repo_root: str) -> str:
return _run_git(repo_root, ["rev-parse", "HEAD^{tree}"])
def _tree_id_for_commitish(repo_root: str, commitish: str) -> str:
return _run_git(repo_root, ["rev-parse", f"{commitish}^{{tree}}"])
def _ref_has_commit(repo_root: str, ref: str) -> bool:
proc = subprocess.run(
["git", "rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"],
cwd=repo_root,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
return proc.returncode == 0
def _commit_tree_id(repo_api: str, headers: dict[str, str], commit_id: str) -> str:
url = f"{repo_api}/commits/{commit_id}?api-version=7.1"
payload = _request_json(url, headers=headers)
tree_id = payload.get("treeId", "") if isinstance(payload, dict) else ""
return tree_id.strip()
def _latest_pr_by_creation(prs: list[dict[str, Any]]) -> list[dict[str, Any]]:
return sorted(prs, key=lambda x: x.get("creationDate", ""), reverse=True)
def _normalize_repo_path(path: str) -> str:
return str(path or "").replace("\\", "/").lstrip("./")
def _is_doc_like(path: str) -> bool:
lp = _normalize_repo_path(path).lower()
if lp.endswith((".md", ".html", ".htm", ".pdf", ".csv", ".txt")):
return True
return "/docs/" in f"/{lp}" or "/object inventory/" in f"/{lp}"
def _is_report_like(path: str) -> bool:
lp = _normalize_repo_path(path).lower()
return "/reports/" in f"/{lp}" or "/assignment report/" in f"/{lp}"
def _is_workload_config_path(path: str, workload_dir: str, backup_folder: str, reports_subdir: str) -> bool:
lp = _normalize_repo_path(path).lower()
backup_norm = _normalize_repo_path(backup_folder).lower().strip("/")
workload_norm = _normalize_repo_path(workload_dir).lower().strip("/")
reports_norm = _normalize_repo_path(reports_subdir).lower().strip("/")
if not backup_norm or not workload_norm:
return False
workload_prefix = f"{backup_norm}/{workload_norm}/"
if not lp.startswith(workload_prefix):
return False
if reports_norm and lp.startswith(f"{backup_norm}/{reports_norm}/"):
return False
if _is_doc_like(lp) or _is_report_like(lp):
return False
return True
def _config_fingerprint_from_local_tree(
repo_root: str, commitish: str, workload_dir: str, backup_folder: str, reports_subdir: str
) -> str:
backup_norm = _normalize_repo_path(backup_folder).strip("/")
workload_norm = _normalize_repo_path(workload_dir).strip("/")
path_prefix = f"{backup_norm}/{workload_norm}" if backup_norm and workload_norm else ""
if not path_prefix:
return ""
try:
out = _run_git(repo_root, ["ls-tree", "-r", "--full-tree", commitish, "--", path_prefix])
except Exception:
return ""
pairs: list[str] = []
for line in out.splitlines():
if "\t" not in line:
continue
left, rel_path = line.split("\t", 1)
parts = left.split()
if len(parts) < 3 or parts[1] != "blob":
continue
blob_id = parts[2].strip()
if not blob_id:
continue
if not _is_workload_config_path(rel_path, workload_dir, backup_folder, reports_subdir):
continue
pairs.append(f"{_normalize_repo_path(rel_path)}\t{blob_id}")
if not pairs:
return ""
pairs.sort(key=lambda item: item.lower())
joined = "\n".join(pairs).encode("utf-8")
return hashlib.sha256(joined).hexdigest()
def _config_fingerprint_from_tree_api(
repo_api: str, headers: dict[str, str], tree_id: str, workload_dir: str, backup_folder: str, reports_subdir: str
) -> str:
if not tree_id:
return ""
url = f"{repo_api}/trees/{tree_id}?recursive=true&api-version=7.1"
payload = _request_json(url, headers=headers)
entries = payload.get("treeEntries", []) if isinstance(payload, dict) else []
pairs: list[str] = []
for entry in entries:
if not isinstance(entry, dict):
continue
if str(entry.get("gitObjectType", "")).lower() != "blob":
continue
rel_path = str(entry.get("relativePath", ""))
if not _is_workload_config_path(rel_path, workload_dir, backup_folder, reports_subdir):
continue
blob_id = str(entry.get("objectId", "")).strip()
if not blob_id:
continue
pairs.append(f"{_normalize_repo_path(rel_path)}\t{blob_id}")
if not pairs:
return ""
pairs.sort(key=lambda item: item.lower())
joined = "\n".join(pairs).encode("utf-8")
return hashlib.sha256(joined).hexdigest()
def _workload_config_diff_exists(
repo_root: str,
baseline_commitish: str,
drift_commitish: str,
workload_dir: str,
backup_folder: str,
reports_subdir: str,
) -> bool:
baseline_fingerprint = _config_fingerprint_from_local_tree(
repo_root=repo_root,
commitish=baseline_commitish,
workload_dir=workload_dir,
backup_folder=backup_folder,
reports_subdir=reports_subdir,
)
drift_fingerprint = _config_fingerprint_from_local_tree(
repo_root=repo_root,
commitish=drift_commitish,
workload_dir=workload_dir,
backup_folder=backup_folder,
reports_subdir=reports_subdir,
)
if baseline_fingerprint and drift_fingerprint:
return baseline_fingerprint != drift_fingerprint
try:
return _tree_id_for_commitish(repo_root, baseline_commitish) != _tree_id_for_commitish(repo_root, drift_commitish)
except Exception:
return True
def _find_matching_abandoned_pr(
repo_api: str,
headers: dict[str, str],
abandoned_prs: list[dict[str, Any]],
drift_tree: str,
repo_root: str,
workload_dir: str,
backup_folder: str,
reports_subdir: str,
drift_commitish: str,
) -> tuple[dict[str, Any] | None, str]:
current_config_fingerprint = _config_fingerprint_from_local_tree(
repo_root=repo_root,
commitish=drift_commitish,
workload_dir=workload_dir,
backup_folder=backup_folder,
reports_subdir=reports_subdir,
)
tree_fingerprint_cache: dict[str, str] = {}
for pr in _latest_pr_by_creation(abandoned_prs):
commit_id = (
((pr.get("lastMergeSourceCommit") or {}).get("commitId"))
or ((pr.get("lastMergeCommit") or {}).get("commitId"))
or ""
).strip()
if not commit_id:
continue
try:
pr_tree = _commit_tree_id(repo_api, headers, commit_id)
except Exception:
continue
if pr_tree and pr_tree == drift_tree:
return pr, "exact-tree"
if current_config_fingerprint and pr_tree:
if pr_tree not in tree_fingerprint_cache:
try:
tree_fingerprint_cache[pr_tree] = _config_fingerprint_from_tree_api(
repo_api=repo_api,
headers=headers,
tree_id=pr_tree,
workload_dir=workload_dir,
backup_folder=backup_folder,
reports_subdir=reports_subdir,
)
except Exception:
tree_fingerprint_cache[pr_tree] = ""
if tree_fingerprint_cache[pr_tree] and tree_fingerprint_cache[pr_tree] == current_config_fingerprint:
return pr, "config-fingerprint"
return None, ""
def _pr_has_reject_vote(pr: dict[str, Any]) -> bool:
reviewers = pr.get("reviewers", [])
if not isinstance(reviewers, list):
return False
for reviewer in reviewers:
if not isinstance(reviewer, dict):
continue
try:
vote = int(reviewer.get("vote", 0))
except Exception:
vote = 0
if vote == -10:
return True
return False
def _current_pr_merge_strategy(pr: dict[str, Any]) -> str:
completion_options = pr.get("completionOptions")
if not isinstance(completion_options, dict):
return ""
raw = str(completion_options.get("mergeStrategy") or "").strip()
if not raw:
return ""
return _normalize_merge_strategy(raw)
def _build_description(workload: str, drift_branch: str, baseline_branch: str, build_number: str, build_id: str) -> str:
is_entra = workload.lower() == "entra"
lead = "Rolling Entra drift PR created by backup pipeline." if is_entra else "Rolling drift PR created by backup pipeline."
return (
f"{lead}\n\n"
f"- Source branch: `{drift_branch}`\n"
f"- Target branch: `{baseline_branch}`\n"
f"- Last pipeline run: `{build_number}` (BuildId: {build_id})\n\n"
"The automated review summary is generated immediately after PR creation and inserted "
"above the reviewer actions section.\n\n"
"## Reviewer Quick Actions\n\n"
"### 1) Accept all changes\n"
"- Merge PR to accept drift into baseline.\n\n"
"### 2) Reject whole PR and revert\n"
"- Set reviewer vote to **Reject**.\n"
"- Abandon PR.\n"
"- Auto-remediation queues restore (if `AUTO_REMEDIATE_ON_PR_REJECTION=true`).\n\n"
"### 3) Reject only selected policy changes\n"
"- In each `Change Needed` policy thread, comment `/reject` for changes you do not want.\n"
"- Optional: use `/accept` for changes you want to keep.\n"
"- Wait for review-sync pipeline (about 5 minutes) to update PR diff.\n"
"- Merge remaining accepted changes.\n"
"- Post-merge auto-remediation queues restore to reconcile tenant to merged baseline "
"(if `AUTO_REMEDIATE_AFTER_MERGE=true`)."
)
def _threads_with_marker(repo_api: str, headers: dict[str, str], pr_id: int, marker: str) -> bool:
url = f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1"
payload = _request_json(url, headers=headers)
threads = payload.get("value", []) if isinstance(payload, dict) else []
for thread in threads:
for comment in thread.get("comments", []):
content = str(comment.get("content", ""))
if marker in content:
return True
return False
def _queue_restore_pipeline(
collection_uri: str,
project: str,
headers: dict[str, str],
definition_id: int,
baseline_branch: str,
include_entra_update: bool,
dry_run: bool,
update_assignments: bool,
remove_unmanaged: bool,
max_workers: int,
exclude_csv: str,
) -> dict[str, Any]:
build_api = f"{collection_uri}/{project}/_apis/build/builds?api-version=7.1"
template_parameters = {
"dryRun": dry_run,
"updateAssignments": update_assignments,
"removeObjectsNotInBaseline": remove_unmanaged,
"includeEntraUpdate": include_entra_update,
"baselineBranch": baseline_branch,
"maxWorkers": max_workers,
}
exclude_csv = _normalize_exclude_csv(exclude_csv)
if exclude_csv:
template_parameters["excludeCsv"] = exclude_csv
body = {
"definition": {"id": definition_id},
"sourceBranch": _ref_from_branch(baseline_branch),
"templateParameters": template_parameters,
}
return _request_json(build_api, headers=headers, method="POST", body=body)
def _post_pr_thread(repo_api: str, headers: dict[str, str], pr_id: int, content: str) -> None:
url = f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1"
body = {
"comments": [{"parentCommentId": 0, "content": content, "commentType": 1}],
"status": "active",
}
_request_json(url, headers=headers, method="POST", body=body)
def main() -> int:
parser = argparse.ArgumentParser(description="Ensure rolling PR exists with optional remediation-on-rejection")
parser.add_argument("--repo-root", required=True)
parser.add_argument("--workload", required=True, choices=["intune", "entra"])
parser.add_argument("--drift-branch", required=True)
parser.add_argument("--baseline-branch", required=True)
parser.add_argument("--pr-title", required=True)
args = parser.parse_args()
token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip()
if not token:
raise SystemExit("SYSTEM_ACCESSTOKEN is empty. Enable OAuth token access for this pipeline.")
collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/")
project = os.environ["SYSTEM_TEAMPROJECT"]
repository_id = os.environ["BUILD_REPOSITORY_ID"]
build_number = os.environ.get("BUILD_BUILDNUMBER", "")
build_id = os.environ.get("BUILD_BUILDID", "")
auto_remediate = _env_bool("AUTO_REMEDIATE_ON_PR_REJECTION", False)
include_entra_update = _env_bool("AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE", False)
remediation_def_id_raw = _env_text("AUTO_REMEDIATE_RESTORE_PIPELINE_ID", "")
remediation_dry_run = _env_bool("AUTO_REMEDIATE_DRY_RUN", False)
remediation_update_assignments = _env_bool("AUTO_REMEDIATE_UPDATE_ASSIGNMENTS", True)
remediation_remove_unmanaged = _env_bool("AUTO_REMEDIATE_REMOVE_OBJECTS", False)
remediation_max_workers_raw = _env_text("AUTO_REMEDIATE_MAX_WORKERS", "10")
remediation_exclude_csv = _normalize_exclude_csv(_env_text("AUTO_REMEDIATE_EXCLUDE_CSV", ""))
pr_merge_strategy = _normalize_merge_strategy(_env_text("ROLLING_PR_MERGE_STRATEGY", "rebase"))
create_as_draft = _env_bool("ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS", False)
try:
remediation_max_workers = int(remediation_max_workers_raw)
except ValueError as exc:
raise SystemExit(f"Invalid AUTO_REMEDIATE_MAX_WORKERS value: {remediation_max_workers_raw}") from exc
if auto_remediate and not remediation_def_id_raw:
print(
"WARNING: AUTO_REMEDIATE_ON_PR_REJECTION=true but AUTO_REMEDIATE_RESTORE_PIPELINE_ID is empty; "
"remediation queueing disabled for this run.",
file=sys.stderr,
)
auto_remediate = False
try:
remediation_def_id = int(remediation_def_id_raw) if remediation_def_id_raw else 0
except ValueError as exc:
raise SystemExit(
f"Invalid AUTO_REMEDIATE_RESTORE_PIPELINE_ID value: {remediation_def_id_raw}"
) from exc
drift_branch = _normalize_branch(args.drift_branch)
baseline_branch = _normalize_branch(args.baseline_branch)
backup_folder = _env_text("BACKUP_FOLDER", "tenant-state")
reports_subdir = _env_text("REPORTS_SUBDIR", "reports")
workload_dir = _env_text(
"INTUNE_BACKUP_SUBDIR" if args.workload == "intune" else "ENTRA_BACKUP_SUBDIR",
args.workload,
)
source_ref = _ref_from_branch(drift_branch)
target_ref = _ref_from_branch(baseline_branch)
repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}"
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
description = _build_description(args.workload, drift_branch, baseline_branch, build_number, build_id)
completion_options = {"mergeStrategy": pr_merge_strategy}
print(f"Rolling PR completion merge strategy: {pr_merge_strategy}")
active_prs = _query_prs(repo_api, headers, source_ref, target_ref, "active")
if active_prs:
pr = active_prs[0]
pr_id = pr.get("pullRequestId")
current_title = str(pr.get("title") or "")
current_description = str(pr.get("description") or "")
current_merge_strategy = _current_pr_merge_strategy(pr)
desired_description = current_description if current_description.strip() else description
needs_patch = (
current_title != args.pr_title
or not current_description.strip()
or current_merge_strategy != pr_merge_strategy
)
if needs_patch:
update_url = f"{repo_api}/pullrequests/{pr_id}?api-version=7.1"
_request_json(
update_url,
headers=headers,
method="PATCH",
body={
"title": args.pr_title,
"description": desired_description,
"completionOptions": completion_options,
},
)
web_url = _pr_web_url(pr)
if needs_patch:
print(f"Updated rolling {args.workload} PR #{pr_id}: {web_url}")
else:
print(f"Rolling {args.workload} PR #{pr_id} already up to date: {web_url}")
print(f"##vso[task.setvariable variable=DRIFT_PR_ID;isOutput=true]{pr_id}")
if web_url:
print(f"##vso[task.setvariable variable=DRIFT_PR_URL;isOutput=true]{web_url}")
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]0")
return 0
_run_git(args.repo_root, ["fetch", "--quiet", "origin", baseline_branch, drift_branch])
baseline_commitish = f"origin/{baseline_branch}" if _ref_has_commit(args.repo_root, f"origin/{baseline_branch}") else baseline_branch
drift_commitish = f"origin/{drift_branch}" if _ref_has_commit(args.repo_root, f"origin/{drift_branch}") else "HEAD"
if not _workload_config_diff_exists(
repo_root=args.repo_root,
baseline_commitish=baseline_commitish,
drift_commitish=drift_commitish,
workload_dir=workload_dir,
backup_folder=backup_folder,
reports_subdir=reports_subdir,
):
print(
"Suppressed PR recreation: drift branch has no effective workload configuration diff "
f"against {baseline_branch}."
)
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
return 0
drift_tree = _tree_id_for_commitish(args.repo_root, drift_commitish)
abandoned_prs = _query_prs(repo_api, headers, source_ref, target_ref, "abandoned")
matching_abandoned, match_reason = _find_matching_abandoned_pr(
repo_api=repo_api,
headers=headers,
abandoned_prs=abandoned_prs,
drift_tree=drift_tree,
repo_root=args.repo_root,
workload_dir=workload_dir,
backup_folder=backup_folder,
reports_subdir=reports_subdir,
drift_commitish=drift_commitish,
)
if matching_abandoned:
if match_reason == "config-fingerprint":
print(
"Matched abandoned PR using configuration fingerprint "
"(ignoring docs/reports churn)."
)
pr_id = int(matching_abandoned["pullRequestId"])
if not _pr_has_reject_vote(matching_abandoned):
print(
"Matched abandoned PR without reviewer Reject vote; "
"skipping remediation and suppressing PR recreation for this unchanged drift snapshot."
)
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
return 0
if not auto_remediate:
print(
"Suppressed PR recreation: latest drift matches a rejected PR, "
"but AUTO_REMEDIATE_ON_PR_REJECTION is disabled."
)
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
return 0
marker = f"Automation marker: AUTO-REMEDIATE-TREE:{drift_tree}"
already_queued = _threads_with_marker(repo_api, headers, pr_id, marker)
if already_queued:
print(
"Suppressed PR recreation: latest drift matches a previously rejected PR and remediation was already queued."
)
else:
queued = _queue_restore_pipeline(
collection_uri=collection_uri,
project=project,
headers=headers,
definition_id=remediation_def_id,
baseline_branch=baseline_branch,
include_entra_update=include_entra_update,
dry_run=remediation_dry_run,
update_assignments=remediation_update_assignments,
remove_unmanaged=remediation_remove_unmanaged,
max_workers=remediation_max_workers,
exclude_csv=remediation_exclude_csv,
)
build_queued_id = queued.get("id")
build_url = ((queued.get("_links") or {}).get("web") or {}).get("href", "")
if not build_url and build_queued_id:
build_url = f"{collection_uri}/{project}/_build/results?buildId={build_queued_id}"
comment = (
"Auto-remediation queued because the latest drift matches a rejected PR.\n\n"
f"Workload: {args.workload}\n"
f"Rejected PR: #{pr_id}\n"
f"Drift tree: {drift_tree}\n"
f"Restore pipeline definition: {remediation_def_id}\n"
f"Restore run: {build_url or '(queued)'}\n\n"
f"{marker}"
)
try:
_post_pr_thread(repo_api, headers, pr_id, comment)
except Exception as exc:
print(f"WARNING: Remediation queued, but failed to post PR thread on #{pr_id}: {exc}")
print(
f"Queued remediation pipeline run (definition={remediation_def_id}, buildId={build_queued_id}) and suppressed PR recreation."
)
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
return 0
if abandoned_prs:
print(
f"No abandoned PR snapshot match for current drift tree (checked {len(abandoned_prs)} abandoned PR(s)); creating/updating rolling PR."
)
create_url = f"{repo_api}/pullrequests?api-version=7.1"
created = _request_json(
create_url,
headers=headers,
method="POST",
body={
"sourceRefName": source_ref,
"targetRefName": target_ref,
"title": args.pr_title,
"description": description,
"isDraft": create_as_draft,
"completionOptions": completion_options,
},
)
pr_id = created.get("pullRequestId")
web_url = _pr_web_url(created)
print(f"Created rolling {args.workload} PR #{pr_id}: {web_url}")
print(f"##vso[task.setvariable variable=DRIFT_PR_ID;isOutput=true]{pr_id}")
if web_url:
print(f"##vso[task.setvariable variable=DRIFT_PR_URL;isOutput=true]{web_url}")
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]0")
return 0
if __name__ == "__main__":
try:
raise SystemExit(main())
except Exception as exc:
print(f"ERROR: Failed to ensure rolling PR: {exc}", file=sys.stderr)
raise

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,171 @@
#!/usr/bin/env python3
"""Revert Entra JSON file edits when only enrichment metadata changed."""
from __future__ import annotations
import argparse
import json
import subprocess
import sys
from pathlib import Path, PurePosixPath
from typing import Any
ENRICHMENT_KEY_NAMES = {
"ownersresolved",
"approleassignmentsresolved",
"requiredresourceaccessresolved",
"appownerorganizationresolved",
"resolutionstatus",
}
def _to_bool(value: str) -> bool:
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
def _run_git(repo_root: Path, args: list[str], check: bool = True) -> subprocess.CompletedProcess[bytes]:
proc = subprocess.run(
["git", *args],
cwd=str(repo_root),
check=False,
capture_output=True,
)
if check and proc.returncode != 0:
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}")
return proc
def _strip_enrichment(value: Any) -> Any:
if isinstance(value, dict):
cleaned: dict[str, Any] = {}
for key, child in value.items():
if str(key).strip().lower() in ENRICHMENT_KEY_NAMES:
continue
cleaned[key] = _strip_enrichment(child)
return cleaned
if isinstance(value, list):
return [_strip_enrichment(item) for item in value]
return value
def _is_enrichment_only_change(old_text: str, new_text: str) -> bool:
if not old_text or not new_text:
return False
try:
old_payload = json.loads(old_text)
new_payload = json.loads(new_text)
except Exception:
return False
if not isinstance(old_payload, dict) or not isinstance(new_payload, dict):
return False
old_stripped = _strip_enrichment(old_payload)
new_stripped = _strip_enrichment(new_payload)
if old_stripped != new_stripped:
return False
return old_payload != new_payload
def _modified_paths(repo_root: Path, workload_root: str) -> list[str]:
proc = _run_git(
repo_root,
["diff", "--name-only", "-z", "--diff-filter=M", "--", workload_root],
check=True,
)
raw = proc.stdout.split(b"\x00")
paths: list[str] = []
for chunk in raw:
text = chunk.decode("utf-8", errors="replace").strip()
if text:
paths.append(text)
return paths
def _is_json_path(path: str) -> bool:
return PurePosixPath(path.replace("\\", "/")).suffix.lower() == ".json"
def filter_enrichment_only_files(repo_root: Path, workload_root: str) -> list[str]:
reverted: list[str] = []
for rel_path in _modified_paths(repo_root, workload_root):
if not _is_json_path(rel_path):
continue
head_proc = _run_git(repo_root, ["show", f"HEAD:{rel_path}"], check=False)
if head_proc.returncode != 0:
continue
old_text = head_proc.stdout.decode("utf-8", errors="replace")
abs_path = repo_root / rel_path
if not abs_path.is_file():
continue
new_text = abs_path.read_text(encoding="utf-8")
if _is_enrichment_only_change(old_text, new_text):
_run_git(repo_root, ["checkout", "--quiet", "--", rel_path], check=True)
reverted.append(rel_path)
return reverted
def find_enrichment_only_modified_files(repo_root: Path, workload_root: str) -> list[str]:
matches: list[str] = []
for rel_path in _modified_paths(repo_root, workload_root):
if not _is_json_path(rel_path):
continue
head_proc = _run_git(repo_root, ["show", f"HEAD:{rel_path}"], check=False)
if head_proc.returncode != 0:
continue
old_text = head_proc.stdout.decode("utf-8", errors="replace")
abs_path = repo_root / rel_path
if not abs_path.is_file():
continue
new_text = abs_path.read_text(encoding="utf-8")
if _is_enrichment_only_change(old_text, new_text):
matches.append(rel_path)
return matches
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repo-root", required=True, help="Repository root path.")
parser.add_argument(
"--workload-root",
default="tenant-state/entra",
help="Path scope inside repo to inspect (default: tenant-state/entra).",
)
parser.add_argument(
"--fail-on-residual-enrichment-drift",
default="true",
help="Exit non-zero when enrichment-only modified files remain after filtering (true/false).",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
repo_root = Path(args.repo_root).resolve()
reverted = filter_enrichment_only_files(repo_root=repo_root, workload_root=args.workload_root)
if reverted:
print(f"Reverted enrichment-only Entra file changes: {len(reverted)}")
for path in reverted:
print(f" - {path}")
else:
print("No enrichment-only Entra file changes detected.")
residual = find_enrichment_only_modified_files(repo_root=repo_root, workload_root=args.workload_root)
if residual:
print(f"Residual enrichment-only Entra file changes still present: {len(residual)}")
for path in residual:
print(f" - {path}")
if _to_bool(args.fail_on_residual_enrichment_drift):
return 2
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,144 @@
#!/usr/bin/env python3
"""Revert Intune Settings Catalog partial exports where settings payload is missing."""
from __future__ import annotations
import argparse
import json
import subprocess
import sys
from pathlib import Path
from typing import Any
def _to_bool(value: str) -> bool:
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
def _run_git_show(repo_root: Path, ref: str, rel_path: str) -> str | None:
proc = subprocess.run(
["git", "show", f"{ref}:{rel_path}"],
cwd=str(repo_root),
check=False,
capture_output=True,
)
if proc.returncode != 0:
return None
return proc.stdout.decode("utf-8", errors="replace")
def _is_settings_catalog_json(file_path: Path, backup_root: Path) -> bool:
if file_path.suffix.lower() != ".json":
return False
rel = file_path.relative_to(backup_root).as_posix().lower()
return rel.startswith("settings catalog/")
def _is_partial_settings_payload(payload: Any) -> bool:
if not isinstance(payload, dict):
return False
setting_count = payload.get("settingCount")
if not isinstance(setting_count, int) or setting_count <= 0:
return False
settings = payload.get("settings")
if not isinstance(settings, list):
return True
return len(settings) == 0
def restore_partial_settings_from_baseline(
repo_root: Path,
backup_root: Path,
baseline_ref: str,
) -> tuple[list[str], list[str]]:
restored: list[str] = []
unresolved: list[str] = []
for file_path in sorted(backup_root.rglob("*.json")):
if not _is_settings_catalog_json(file_path, backup_root):
continue
try:
current_payload = json.loads(file_path.read_text(encoding="utf-8"))
except Exception:
continue
if not _is_partial_settings_payload(current_payload):
continue
rel_path = file_path.relative_to(repo_root).as_posix()
baseline_text = _run_git_show(repo_root, baseline_ref, rel_path)
if not baseline_text:
unresolved.append(rel_path)
continue
try:
baseline_payload = json.loads(baseline_text)
except Exception:
unresolved.append(rel_path)
continue
baseline_settings = baseline_payload.get("settings")
if not isinstance(baseline_settings, list) or len(baseline_settings) == 0:
unresolved.append(rel_path)
continue
current_payload["settings"] = baseline_settings
file_path.write_text(json.dumps(current_payload, indent=5, ensure_ascii=False), encoding="utf-8")
restored.append(rel_path)
return restored, unresolved
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repo-root", required=True, help="Repository root path.")
parser.add_argument(
"--backup-root",
default="tenant-state/intune",
help="Path to Intune backup root (default: tenant-state/intune).",
)
parser.add_argument(
"--baseline-ref",
default="HEAD",
help="Git ref used as baseline for restoration (default: HEAD).",
)
parser.add_argument(
"--fail-on-unresolved-partial-exports",
default="true",
help="Exit non-zero when partial exports cannot be restored from baseline (true/false).",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
repo_root = Path(args.repo_root).resolve()
backup_root_arg = Path(args.backup_root)
backup_root = backup_root_arg if backup_root_arg.is_absolute() else repo_root / backup_root_arg
backup_root = backup_root.resolve()
restored, unresolved = restore_partial_settings_from_baseline(
repo_root=repo_root,
backup_root=backup_root,
baseline_ref=args.baseline_ref,
)
if restored:
print(f"Restored partial Intune Settings Catalog exports from baseline: {len(restored)}")
for path in restored:
print(f" - {path}")
else:
print("No partial Intune Settings Catalog exports detected.")
if unresolved:
print(f"Unresolved partial Intune Settings Catalog exports: {len(unresolved)}")
for path in unresolved:
print(f" - {path}")
if _to_bool(args.fail_on_unresolved_partial_exports):
return 2
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,259 @@
#!/usr/bin/env python3
"""Generate a dedicated apps inventory CSV from Entra app exports."""
from __future__ import annotations
import argparse
import csv
import json
from pathlib import Path
from typing import Any
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--root", required=True, help="Path to the Entra workload backup root (tenant-state/entra).")
parser.add_argument(
"--output-dir",
required=True,
help="Directory where apps inventory report files will be written.",
)
parser.add_argument(
"--output-name",
default="apps-inventory.csv",
help="Output CSV filename (default: apps-inventory.csv).",
)
return parser.parse_args()
def safe_text(value: object) -> str:
if value is None:
return ""
return str(value).strip()
def summarize_owners(owners: object) -> tuple[int, str]:
if not isinstance(owners, list):
return 0, ""
labels: list[str] = []
for owner in owners:
if not isinstance(owner, dict):
continue
label = (
safe_text(owner.get("displayName"))
or safe_text(owner.get("userPrincipalName"))
or safe_text(owner.get("appId"))
or safe_text(owner.get("id"))
or "Unknown owner"
)
labels.append(label)
return len(labels), "; ".join(labels)
def summarize_required_resource_access(entries: object) -> tuple[int, str]:
if not isinstance(entries, list):
return 0, ""
summary: list[str] = []
total_permissions = 0
for entry in entries:
if not isinstance(entry, dict):
continue
resource_name = safe_text(entry.get("resourceDisplayName")) or "Unresolved resource"
resource_app_id = safe_text(entry.get("resourceAppId"))
permissions = entry.get("permissions")
permission_labels: list[str] = []
if isinstance(permissions, list):
for permission in permissions:
if not isinstance(permission, dict):
continue
total_permissions += 1
perm_type = safe_text(permission.get("type")) or "UnknownType"
perm_label = (
safe_text(permission.get("value"))
or safe_text(permission.get("displayName"))
or safe_text(permission.get("id"))
or "UnknownPermission"
)
permission_labels.append(f"{perm_label} [{perm_type}]")
resource_label = resource_name
if resource_app_id:
resource_label += f" ({resource_app_id})"
if permission_labels:
summary.append(f"{resource_label}: {', '.join(permission_labels)}")
else:
summary.append(resource_label)
return total_permissions, "; ".join(summary)
def summarize_enterprise_app_role_assignments(entries: object) -> tuple[int, str]:
if not isinstance(entries, list):
return 0, ""
summary: list[str] = []
count = 0
for entry in entries:
if not isinstance(entry, dict):
continue
count += 1
resource_name = safe_text(entry.get("resourceDisplayName")) or "Unresolved resource"
resource_id = safe_text(entry.get("resourceId"))
role_name = (
safe_text(entry.get("appRoleValue"))
or safe_text(entry.get("appRoleDisplayName"))
or safe_text(entry.get("appRoleId"))
or "Default access"
)
label = resource_name
if resource_id:
label += f" ({resource_id})"
summary.append(f"{label}: {role_name}")
return count, "; ".join(summary)
def verified_publisher_label(value: object) -> str:
if not isinstance(value, dict):
return ""
return (
safe_text(value.get("displayName"))
or safe_text(value.get("verifiedPublisherId"))
or safe_text(value.get("addedDateTime"))
)
def iter_exported_json(export_dir: Path) -> list[tuple[Path, dict[str, Any]]]:
if not export_dir.exists():
return []
items: list[tuple[Path, dict[str, Any]]] = []
for path in sorted(export_dir.rglob("*.json")):
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception:
continue
if isinstance(payload, dict):
items.append((path, payload))
return items
def main() -> int:
args = parse_args()
root = Path(args.root).resolve()
output_dir = Path(args.output_dir).resolve()
output_path = output_dir / args.output_name
if not root.exists():
raise SystemExit(f"Backup path does not exist: {root}")
app_reg_dir = root / "App Registrations"
ent_apps_dir = root / "Enterprise Applications"
app_reg_items = iter_exported_json(app_reg_dir)
ent_app_items = iter_exported_json(ent_apps_dir)
rows: list[dict[str, str]] = []
for source_path, payload in app_reg_items:
owner_count, owners = summarize_owners(payload.get("ownersResolved"))
perm_count, permissions = summarize_required_resource_access(
payload.get("requiredResourceAccessResolved")
)
rows.append(
{
"AppType": "AppRegistration",
"DisplayName": safe_text(payload.get("displayName")) or source_path.stem,
"ObjectId": safe_text(payload.get("id")),
"AppId": safe_text(payload.get("appId")),
"SignInAudience": safe_text(payload.get("signInAudience")),
"ServicePrincipalType": "",
"AccountEnabled": "",
"PublisherDomain": safe_text(payload.get("publisherDomain")),
"PublisherName": "",
"VerifiedPublisher": verified_publisher_label(payload.get("verifiedPublisher")),
"CreatedDateTime": safe_text(payload.get("createdDateTime")),
"OwnersCount": str(owner_count),
"OwnersResolved": owners,
"ResolvedPermissionCount": str(perm_count),
"ResolvedPermissions": permissions,
"ResolvedAppRoleAssignmentCount": "0",
"ResolvedAppRoleAssignments": "",
"SourceFile": source_path.relative_to(root).as_posix(),
}
)
for source_path, payload in ent_app_items:
owner_count, owners = summarize_owners(payload.get("ownersResolved"))
assignment_count, assignments = summarize_enterprise_app_role_assignments(
payload.get("appRoleAssignmentsResolved")
)
rows.append(
{
"AppType": "EnterpriseApplication",
"DisplayName": safe_text(payload.get("displayName")) or source_path.stem,
"ObjectId": safe_text(payload.get("id")),
"AppId": safe_text(payload.get("appId")),
"SignInAudience": "",
"ServicePrincipalType": safe_text(payload.get("servicePrincipalType")),
"AccountEnabled": safe_text(payload.get("accountEnabled")),
"PublisherDomain": "",
"PublisherName": safe_text(payload.get("publisherName")),
"VerifiedPublisher": verified_publisher_label(payload.get("verifiedPublisher")),
"CreatedDateTime": "",
"OwnersCount": str(owner_count),
"OwnersResolved": owners,
"ResolvedPermissionCount": "0",
"ResolvedPermissions": "",
"ResolvedAppRoleAssignmentCount": str(assignment_count),
"ResolvedAppRoleAssignments": assignments,
"SourceFile": source_path.relative_to(root).as_posix(),
}
)
rows.sort(
key=lambda row: (
row["AppType"].lower(),
row["DisplayName"].lower(),
row["ObjectId"].lower(),
)
)
output_dir.mkdir(parents=True, exist_ok=True)
fieldnames = [
"AppType",
"DisplayName",
"ObjectId",
"AppId",
"SignInAudience",
"ServicePrincipalType",
"AccountEnabled",
"PublisherDomain",
"PublisherName",
"VerifiedPublisher",
"CreatedDateTime",
"OwnersCount",
"OwnersResolved",
"ResolvedPermissionCount",
"ResolvedPermissions",
"ResolvedAppRoleAssignmentCount",
"ResolvedAppRoleAssignments",
"SourceFile",
]
with output_path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
print(
"Generated apps inventory report: "
+ f"{output_path} "
+ f"(rows={len(rows)}, appRegistrations={len(app_reg_items)}, enterpriseApps={len(ent_app_items)})"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,419 @@
#!/usr/bin/env python3
"""Generate a policy assignment inventory report from Intune backup JSON files."""
from __future__ import annotations
import argparse
import csv
import json
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterable
GROUP_TARGET_TYPES = {
"#microsoft.graph.groupAssignmentTarget",
"#microsoft.graph.exclusionGroupAssignmentTarget",
}
DEFAULT_POLICY_TYPES = {
"app configuration",
"app protection",
"applications",
"compliance policies",
"conditional access",
"device configurations",
"enrollment configurations",
"enrollment profiles",
"filters",
"scripts",
"settings catalog",
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--root", required=True, help="Path to the workload backup root (for example tenant-state/intune).")
parser.add_argument(
"--output-dir",
required=True,
help="Directory where report files will be written.",
)
parser.add_argument(
"--policy-type",
action="append",
default=[],
help=(
"Optional filter for policy type (top-level backup folder name). "
"Repeat the flag or pass a comma-separated list."
),
)
parser.add_argument(
"--graph-type",
action="append",
default=[],
help=(
"Optional filter for Graph @odata.type values. "
"Repeat the flag or pass a comma-separated list."
),
)
return parser.parse_args()
@dataclass
class AssignmentRow:
category: str
policy_type: str
object_name: str
object_type: str
assignment_state: str
assignment_count: int
intent: str
assignment_target: str
target_type: str
assignment_filter: str
filter_type: str
source_file: str
def safe_text(value: object) -> str:
if value is None:
return ""
return str(value).strip()
def normalize_intent(intent: str) -> str:
normalized = safe_text(intent).lower()
if normalized in {"apply", "include"}:
return "Include"
if normalized in {"exclude"}:
return "Exclude"
if not normalized:
return "Include"
return normalized.capitalize()
def infer_intent(assignment: dict, target_type: str) -> str:
target_type_lower = safe_text(target_type).lower()
if "exclusion" in target_type_lower:
return "Exclude"
explicit = safe_text(assignment.get("intent"))
if explicit:
return normalize_intent(explicit)
return "Include"
def resolve_assignment_target(target: dict) -> str:
target_type = safe_text(target.get("@odata.type"))
if target_type == "#microsoft.graph.allDevicesAssignmentTarget":
return "All devices"
if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget":
return "All users"
if target_type in GROUP_TARGET_TYPES:
return (
safe_text(target.get("groupDisplayName"))
or safe_text(target.get("groupName"))
or safe_text(target.get("groupId"))
or "Unresolved group"
)
return (
safe_text(target.get("groupDisplayName"))
or safe_text(target.get("groupName"))
or safe_text(target.get("displayName"))
or safe_text(target.get("id"))
or "Unknown target"
)
def escape_md_cell(value: str) -> str:
return value.replace("\\", "\\\\").replace("|", "\\|").replace("\n", " ").strip()
def parse_filter_values(raw_values: list[str]) -> set[str]:
values = set()
for raw in raw_values:
for item in safe_text(raw).split(","):
normalized = safe_text(item)
if normalized:
values.add(normalized.lower())
return values
def iter_assignment_rows(
root: Path,
policy_type_filter: set[str],
graph_type_filter: set[str],
) -> Iterable[AssignmentRow]:
excluded_categories = {
"App Registrations",
"Enterprise Applications",
}
for path in sorted(root.rglob("*.json")):
try:
rel_path = path.relative_to(root)
except ValueError:
continue
if rel_path.parts and rel_path.parts[0] in {"reports"}:
continue
if "__archive__" in rel_path.parts:
continue
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception:
continue
if not isinstance(payload, dict):
continue
object_name = safe_text(payload.get("displayName")) or safe_text(payload.get("name"))
if not object_name:
object_name = path.stem.split("__")[0]
object_type = safe_text(payload.get("@odata.type"))
category = "/".join(rel_path.parent.parts)
policy_type = rel_path.parts[0] if rel_path.parts else ""
if any(
category == excluded or category.startswith(f"{excluded}/")
for excluded in excluded_categories
):
continue
if policy_type_filter and policy_type.lower() not in policy_type_filter:
continue
if graph_type_filter and object_type.lower() not in graph_type_filter:
continue
assignments = payload.get("assignments")
if not isinstance(assignments, list):
yield AssignmentRow(
category=category,
policy_type=policy_type,
object_name=object_name,
object_type=object_type,
assignment_state="NotExported",
assignment_count=0,
intent="None",
assignment_target="Not exported in backup",
target_type="",
assignment_filter="",
filter_type="",
source_file=rel_path.as_posix(),
)
continue
if not assignments:
yield AssignmentRow(
category=category,
policy_type=policy_type,
object_name=object_name,
object_type=object_type,
assignment_state="Unassigned",
assignment_count=0,
intent="None",
assignment_target="No assignments",
target_type="",
assignment_filter="",
filter_type="",
source_file=rel_path.as_posix(),
)
continue
assignment_count = len([item for item in assignments if isinstance(item, dict)])
if assignment_count == 0:
yield AssignmentRow(
category=category,
policy_type=policy_type,
object_name=object_name,
object_type=object_type,
assignment_state="Unassigned",
assignment_count=0,
intent="None",
assignment_target="No assignments",
target_type="",
assignment_filter="",
filter_type="",
source_file=rel_path.as_posix(),
)
continue
for assignment in assignments:
if not isinstance(assignment, dict):
continue
target = assignment.get("target") if isinstance(assignment.get("target"), dict) else {}
target_type = safe_text(target.get("@odata.type"))
intent = infer_intent(assignment, target_type)
assignment_target = resolve_assignment_target(target)
assignment_filter = safe_text(target.get("deviceAndAppManagementAssignmentFilterId"))
filter_type = safe_text(target.get("deviceAndAppManagementAssignmentFilterType"))
yield AssignmentRow(
category=category,
policy_type=policy_type,
object_name=object_name,
object_type=object_type,
assignment_state="Assigned",
assignment_count=assignment_count,
intent=intent,
assignment_target=assignment_target,
target_type=target_type,
assignment_filter=assignment_filter,
filter_type=filter_type,
source_file=rel_path.as_posix(),
)
def write_csv(rows: list[AssignmentRow], output_path: Path) -> None:
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.writer(handle)
writer.writerow(
[
"Category",
"PolicyType",
"ObjectName",
"ObjectType",
"AssignmentState",
"AssignmentCount",
"Intent",
"AssignmentTarget",
"TargetType",
"AssignmentFilter",
"FilterType",
"SourceFile",
]
)
for row in rows:
writer.writerow(
[
row.category,
row.policy_type,
row.object_name,
row.object_type,
row.assignment_state,
row.assignment_count,
row.intent,
row.assignment_target,
row.target_type,
row.assignment_filter,
row.filter_type,
row.source_file,
]
)
def write_markdown(rows: list[AssignmentRow], output_path: Path) -> None:
output_path.parent.mkdir(parents=True, exist_ok=True)
generated = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
objects = {(row.category, row.object_name, row.source_file) for row in rows}
assigned_objects = {
(row.category, row.object_name, row.source_file)
for row in rows
if row.assignment_state == "Assigned"
}
unassigned_objects = {
(row.category, row.object_name, row.source_file)
for row in rows
if row.assignment_state == "Unassigned"
}
not_exported_objects = {
(row.category, row.object_name, row.source_file)
for row in rows
if row.assignment_state == "NotExported"
}
policy_type_counts = {}
for row in rows:
key = row.policy_type or "Unknown"
policy_type_counts[key] = policy_type_counts.get(key, 0) + 1
with output_path.open("w", encoding="utf-8") as handle:
handle.write("# Policy Assignment Inventory Report\n\n")
handle.write(f"Generated: `{generated}`\n\n")
handle.write(f"- Total objects in report: **{len(objects)}**\n")
handle.write(f"- Objects with assignments: **{len(assigned_objects)}**\n")
handle.write(f"- Objects without assignments: **{len(unassigned_objects)}**\n")
handle.write(f"- Objects with assignment field not exported: **{len(not_exported_objects)}**\n")
handle.write(f"- Total rows: **{len(rows)}**\n\n")
handle.write("## Rows by policy type\n\n")
handle.write("| Policy Type | Rows |\n")
handle.write("|---|---|\n")
for policy_type, count in sorted(policy_type_counts.items(), key=lambda item: item[0].lower()):
handle.write(f"| {escape_md_cell(policy_type)} | {count} |\n")
handle.write("\n")
handle.write(
"| Policy Type | Category | Object | Object Type | Assignment State | Assignment Count | Intent | Assignment Target | Target Type | Filter | Filter Type | Source |\n"
)
handle.write("|---|---|---|---|---|---|---|---|---|---|---|---|\n")
for row in rows:
handle.write(
"| "
+ " | ".join(
[
escape_md_cell(row.policy_type),
escape_md_cell(row.category),
escape_md_cell(row.object_name),
escape_md_cell(row.object_type),
escape_md_cell(row.assignment_state),
escape_md_cell(str(row.assignment_count)),
escape_md_cell(row.intent),
escape_md_cell(row.assignment_target),
escape_md_cell(row.target_type),
escape_md_cell(row.assignment_filter),
escape_md_cell(row.filter_type),
escape_md_cell(row.source_file),
]
)
+ " |\n"
)
def main() -> int:
args = parse_args()
root = Path(args.root).resolve()
output_dir = Path(args.output_dir).resolve()
policy_type_filter = parse_filter_values(args.policy_type)
graph_type_filter = parse_filter_values(args.graph_type)
using_default_policy_scope = False
if not policy_type_filter:
policy_type_filter = set(DEFAULT_POLICY_TYPES)
using_default_policy_scope = True
if not root.exists():
raise SystemExit(f"Backup path does not exist: {root}")
rows = sorted(
iter_assignment_rows(root, policy_type_filter, graph_type_filter),
key=lambda x: (
x.policy_type.lower(),
x.category.lower(),
x.object_name.lower(),
x.assignment_state,
x.intent.lower(),
x.assignment_target.lower(),
),
)
markdown_path = output_dir / "policy-assignments.md"
csv_path = output_dir / "policy-assignments.csv"
write_markdown(rows, markdown_path)
write_csv(rows, csv_path)
print(
f"Generated assignment report with {len(rows)} rows: "
f"{markdown_path} and {csv_path}"
)
if using_default_policy_scope:
print(
"Applied default policy scope: "
+ ", ".join(sorted(DEFAULT_POLICY_TYPES))
)
elif policy_type_filter:
print(f"Applied policy type filter: {', '.join(sorted(policy_type_filter))}")
if graph_type_filter:
print(f"Applied graph type filter: {', '.join(sorted(graph_type_filter))}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,231 @@
#!/usr/bin/env python3
"""Generate broad object inventory CSV reports from backup JSON files."""
from __future__ import annotations
import argparse
import csv
import json
import re
from pathlib import Path
GROUP_TARGET_TYPES = {
"#microsoft.graph.groupAssignmentTarget",
"#microsoft.graph.exclusionGroupAssignmentTarget",
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--root", required=True, help="Path to the workload backup root (for example tenant-state/intune).")
parser.add_argument(
"--output-dir",
required=True,
help="Directory where report files will be written.",
)
parser.add_argument(
"--per-type-dir",
default="Object Inventory",
help="Directory name under output-dir for per-policy-type CSVs.",
)
return parser.parse_args()
def safe_text(value: object) -> str:
if value is None:
return ""
return str(value).strip()
def slugify(value: str) -> str:
text = safe_text(value).lower()
text = re.sub(r"[^a-z0-9]+", "-", text).strip("-")
return text or "unknown"
def infer_intent(assignment: dict, target_type: str) -> str:
if "exclusion" in target_type.lower():
return "Exclude"
explicit = safe_text(assignment.get("intent")).lower()
if explicit in {"exclude"}:
return "Exclude"
return "Include"
def resolve_assignment_target(target: dict) -> str:
target_type = safe_text(target.get("@odata.type"))
if target_type == "#microsoft.graph.allDevicesAssignmentTarget":
return "All devices"
if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget":
return "All users"
if target_type in GROUP_TARGET_TYPES:
return (
safe_text(target.get("groupDisplayName"))
or safe_text(target.get("groupName"))
or safe_text(target.get("groupId"))
or "Unresolved group"
)
return (
safe_text(target.get("groupDisplayName"))
or safe_text(target.get("groupName"))
or safe_text(target.get("displayName"))
or safe_text(target.get("id"))
or "Unknown target"
)
def summarize_assignments(payload: dict) -> dict[str, object]:
assignments = payload.get("assignments")
if not isinstance(assignments, list):
return {
"state": "NotExported",
"total": 0,
"include_targets": "",
"exclude_targets": "",
"all_users_assigned": "false",
"all_devices_assigned": "false",
}
include_targets: list[str] = []
exclude_targets: list[str] = []
all_users = False
all_devices = False
valid = [item for item in assignments if isinstance(item, dict)]
for assignment in valid:
target = assignment.get("target") if isinstance(assignment.get("target"), dict) else {}
target_type = safe_text(target.get("@odata.type"))
target_name = resolve_assignment_target(target)
intent = infer_intent(assignment, target_type)
if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget":
all_users = True
if target_type == "#microsoft.graph.allDevicesAssignmentTarget":
all_devices = True
if intent == "Exclude":
exclude_targets.append(target_name)
else:
include_targets.append(target_name)
state = "Assigned" if valid else "Unassigned"
if assignments == []:
state = "Unassigned"
return {
"state": state,
"total": len(valid),
"include_targets": "; ".join(sorted(set(include_targets))),
"exclude_targets": "; ".join(sorted(set(exclude_targets))),
"all_users_assigned": str(all_users).lower(),
"all_devices_assigned": str(all_devices).lower(),
}
def iter_rows(root: Path) -> list[dict[str, str]]:
rows: list[dict[str, str]] = []
for path in sorted(root.rglob("*.json")):
rel = path.relative_to(root)
if rel.parts and rel.parts[0] in {"reports"}:
continue
if "__archive__" in rel.parts:
continue
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception:
continue
if not isinstance(payload, dict):
continue
summary = summarize_assignments(payload)
policy_type = rel.parts[0] if rel.parts else ""
category = "/".join(rel.parent.parts)
object_name = safe_text(payload.get("displayName")) or safe_text(payload.get("name"))
if not object_name:
object_name = path.stem.split("__")[0]
rows.append(
{
"PolicyType": policy_type,
"Category": category,
"ObjectName": object_name,
"ObjectType": safe_text(payload.get("@odata.type")),
"ObjectId": safe_text(payload.get("id")),
"AppId": safe_text(payload.get("appId")),
"Description": safe_text(payload.get("description")),
"AssignmentState": safe_text(summary["state"]),
"AssignmentCount": str(summary["total"]),
"IncludeTargets": safe_text(summary["include_targets"]),
"ExcludeTargets": safe_text(summary["exclude_targets"]),
"AllUsersAssigned": safe_text(summary["all_users_assigned"]),
"AllDevicesAssigned": safe_text(summary["all_devices_assigned"]),
"SourceFile": rel.as_posix(),
}
)
rows.sort(
key=lambda row: (
row["PolicyType"].lower(),
row["Category"].lower(),
row["ObjectName"].lower(),
row["SourceFile"].lower(),
)
)
return rows
def write_csv(path: Path, rows: list[dict[str, str]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
headers = [
"PolicyType",
"Category",
"ObjectName",
"ObjectType",
"ObjectId",
"AppId",
"Description",
"AssignmentState",
"AssignmentCount",
"IncludeTargets",
"ExcludeTargets",
"AllUsersAssigned",
"AllDevicesAssigned",
"SourceFile",
]
with path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fieldnames=headers)
writer.writeheader()
writer.writerows(rows)
def main() -> int:
args = parse_args()
root = Path(args.root).resolve()
output_dir = Path(args.output_dir).resolve()
per_type_root = output_dir / args.per_type_dir
if not root.exists():
raise SystemExit(f"Backup path does not exist: {root}")
rows = iter_rows(root)
all_report = output_dir / "object-inventory-all.csv"
write_csv(all_report, rows)
per_type_counts: dict[str, int] = {}
for policy_type in sorted({row["PolicyType"] for row in rows}):
type_rows = [row for row in rows if row["PolicyType"] == policy_type]
per_type_report = per_type_root / f"{slugify(policy_type)}-inventory.csv"
write_csv(per_type_report, type_rows)
per_type_counts[policy_type] = len(type_rows)
print(
f"Generated object inventory reports: all={all_report}, "
f"perTypeCount={len(per_type_counts)}, rows={len(rows)}"
)
for policy_type, count in sorted(per_type_counts.items(), key=lambda item: item[0].lower()):
print(f" - {policy_type}: {count} rows")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,447 @@
#!/usr/bin/env python3
"""Queue restore automatically after merged rolling PR that contains /reject decisions."""
from __future__ import annotations
import argparse
import base64
import datetime as dt
import json
import os
import re
import sys
import urllib.parse
from pathlib import Path
from typing import Any
# common.py lives in the same directory; ensure it can be imported when the
# script is executed directly.
_sys_path_inserted = False
if __file__:
_script_dir = str(Path(__file__).resolve().parent)
if _script_dir not in sys.path:
sys.path.insert(0, _script_dir)
_sys_path_inserted = True
import common
if _sys_path_inserted:
sys.path.pop(0)
_env_text = common.env_text
_env_bool = common.env_bool
_request_json = common.request_json
REJECT_CMD_RE = re.compile(r"(?im)^\s*(?:/|#)?reject\b")
DECISION_RE = re.compile(r"(?im)^\s*(?:/|#)?(?P<decision>reject|accept)\b")
AUTO_TICKET_THREAD_PREFIX = "AUTO-CHANGE-TICKET:"
MERGE_MARKER_PREFIX = "AUTO-RESTORE-AFTER-MERGE:"
def _normalize_branch(branch: str) -> str:
b = branch.strip()
if b.startswith("refs/heads/"):
return b[len("refs/heads/") :]
return b
def _ref_from_branch(branch: str) -> str:
return f"refs/heads/{_normalize_branch(branch)}"
def _parse_iso_utc(value: str) -> dt.datetime | None:
text = (value or "").strip()
if not text:
return None
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
parsed = dt.datetime.fromisoformat(text)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=dt.timezone.utc)
return parsed.astimezone(dt.timezone.utc)
def _query_completed_prs(
repo_api: str,
headers: dict[str, str],
source_ref: str,
target_ref: str,
) -> list[dict[str, Any]]:
query = urllib.parse.urlencode(
{
"searchCriteria.status": "completed",
"searchCriteria.sourceRefName": source_ref,
"searchCriteria.targetRefName": target_ref,
"api-version": "7.1",
},
quote_via=urllib.parse.quote,
safe="/",
)
payload = _request_json(f"{repo_api}/pullrequests?{query}", headers=headers)
items = payload.get("value", []) if isinstance(payload, dict) else []
return sorted(items, key=lambda x: x.get("closedDate", ""), reverse=True)
def _threads(repo_api: str, headers: dict[str, str], pr_id: int) -> list[dict[str, Any]]:
payload = _request_json(
f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1",
headers=headers,
)
return payload.get("value", []) if isinstance(payload, dict) else []
def _thread_comment_contents(threads: list[dict[str, Any]]) -> list[str]:
out: list[str] = []
for thread in threads:
comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else []
for comment in comments:
out.append(str(comment.get("content", "") or ""))
return out
def _ticket_path_from_content(content: str) -> str | None:
marker_re = re.compile(
r"(?:^|\n)\s*(?:Automation marker:\s*)?"
+ re.escape(AUTO_TICKET_THREAD_PREFIX)
+ r"(?P<id>[A-Za-z0-9_-]+)\s*(?:$|\n)"
)
match = marker_re.search(content or "")
if not match:
return None
encoded = match.group("id")
padding = "=" * ((4 - len(encoded) % 4) % 4)
try:
return base64.urlsafe_b64decode((encoded + padding).encode("ascii")).decode("utf-8")
except Exception:
return None
def _latest_thread_decision(comments: list[dict[str, Any]]) -> str | None:
decision: str | None = None
def _comment_sort_key(comment: dict[str, Any]) -> tuple[int, int]:
try:
comment_id = int(comment.get("id", 0))
except Exception:
comment_id = 0
try:
parent_id = int(comment.get("parentCommentId", 0))
except Exception:
parent_id = 0
return (comment_id, parent_id)
for comment in sorted(comments, key=_comment_sort_key):
content = str(comment.get("content", "") or "")
match = DECISION_RE.search(content)
if match:
decision = match.group("decision").lower()
return decision
def _rejected_ticket_paths(threads: list[dict[str, Any]]) -> list[str]:
rejected: set[str] = set()
for thread in threads:
comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else []
marker_path: str | None = None
for comment in comments:
marker_path = _ticket_path_from_content(str(comment.get("content", "") or ""))
if marker_path:
break
if not marker_path:
continue
decision = _latest_thread_decision(comments)
if decision == "reject":
rejected.add(marker_path)
return sorted(rejected)
def _has_reject_signal(comments: list[str]) -> bool:
for content in comments:
if REJECT_CMD_RE.search(content):
return True
if "Auto-action: /reject detected." in content:
return True
return False
def _has_merge_marker(comments: list[str], merge_commit: str) -> bool:
marker = f"Automation marker: {MERGE_MARKER_PREFIX}{merge_commit}"
return any(marker in content for content in comments)
def _is_permission_error(exc: Exception) -> bool:
msg = str(exc).lower()
return "http 403" in msg or "forbidden" in msg
def _normalize_exclude_csv(value: str) -> str:
normalized = str(value or "").strip()
if normalized.lower() in {"", "none", "null", "n/a", "-", "_none_"}:
return ""
return normalized
def _diagnose_queue_permission(
collection_uri: str,
project: str,
headers: dict[str, str],
definition_id: int,
) -> None:
definition_url = (
f"{collection_uri}/{project}/_apis/build/definitions/{definition_id}"
"?api-version=7.1"
)
try:
payload = _request_json(definition_url, headers=headers)
definition_name = str(payload.get("name", "") or "").strip()
print(
"Diagnostic: restore pipeline definition is readable "
f"(id={definition_id}, name='{definition_name or 'n/a'}')."
)
print(
"Diagnostic: queue call was forbidden, so missing permission is likely "
"'Queue builds' on that restore pipeline (or pipeline is not authorized to use it)."
)
except Exception as diag_exc:
print(
"Diagnostic: unable to read restore pipeline definition "
f"id={definition_id}. Details: {diag_exc}"
)
print(
"Diagnostic: likely wrong definition ID, wrong project, or missing 'View builds' permission "
"for the calling pipeline identity."
)
def _queue_restore_pipeline(
collection_uri: str,
project: str,
headers: dict[str, str],
definition_id: int,
baseline_branch: str,
include_entra_update: bool,
dry_run: bool,
update_assignments: bool,
remove_unmanaged: bool,
max_workers: int,
exclude_csv: str,
restore_mode: str = "full",
restore_paths_csv: str = "",
) -> dict[str, Any]:
build_api = f"{collection_uri}/{project}/_apis/build/builds?api-version=7.1"
template_parameters = {
"dryRun": dry_run,
"updateAssignments": update_assignments,
"removeObjectsNotInBaseline": remove_unmanaged,
"includeEntraUpdate": include_entra_update,
"baselineBranch": baseline_branch,
"maxWorkers": max_workers,
"restoreMode": restore_mode,
}
if restore_mode == "selective" and restore_paths_csv.strip():
template_parameters["restorePathsCsv"] = restore_paths_csv.strip()
exclude_csv = _normalize_exclude_csv(exclude_csv)
if exclude_csv:
template_parameters["excludeCsv"] = exclude_csv
body = {
"definition": {"id": definition_id},
"sourceBranch": _ref_from_branch(baseline_branch),
"templateParameters": template_parameters,
}
return _request_json(build_api, headers=headers, method="POST", body=body)
def _post_pr_thread(repo_api: str, headers: dict[str, str], pr_id: int, content: str) -> None:
_request_json(
f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1",
headers=headers,
method="POST",
body={
"comments": [
{
"parentCommentId": 0,
"content": content,
"commentType": 1,
}
],
"status": 1,
},
)
def main() -> int:
parser = argparse.ArgumentParser(description="Queue restore after merged rolling PR with /reject decisions")
parser.add_argument("--workload", required=True, choices=["intune", "entra"])
parser.add_argument("--drift-branch", required=True)
parser.add_argument("--baseline-branch", required=True)
args = parser.parse_args()
if not _env_bool("AUTO_REMEDIATE_AFTER_MERGE", False):
print("Post-merge auto-remediation disabled (set AUTO_REMEDIATE_AFTER_MERGE=true).")
return 0
token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip()
if not token:
raise SystemExit("SYSTEM_ACCESSTOKEN is empty.")
definition_raw = _env_text("AUTO_REMEDIATE_RESTORE_PIPELINE_ID", "")
if not definition_raw:
print(
"Post-merge auto-remediation queue skipped: "
"AUTO_REMEDIATE_RESTORE_PIPELINE_ID is empty."
)
return 0
try:
definition_id = int(definition_raw)
except ValueError as exc:
raise SystemExit(f"Invalid AUTO_REMEDIATE_RESTORE_PIPELINE_ID: {definition_raw}") from exc
max_workers_raw = _env_text("AUTO_REMEDIATE_MAX_WORKERS", "10")
try:
max_workers = int(max_workers_raw)
except ValueError as exc:
raise SystemExit(f"Invalid AUTO_REMEDIATE_MAX_WORKERS: {max_workers_raw}") from exc
lookback_hours_raw = _env_text("AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS", "168")
try:
lookback_hours = int(lookback_hours_raw)
except ValueError as exc:
raise SystemExit(f"Invalid AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS: {lookback_hours_raw}") from exc
collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/")
project = os.environ["SYSTEM_TEAMPROJECT"]
repository_id = os.environ["BUILD_REPOSITORY_ID"]
include_entra_update = _env_bool("AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE", False)
dry_run = _env_bool("AUTO_REMEDIATE_DRY_RUN", False)
update_assignments = _env_bool("AUTO_REMEDIATE_UPDATE_ASSIGNMENTS", True)
remove_unmanaged = _env_bool("AUTO_REMEDIATE_REMOVE_OBJECTS", False)
exclude_csv = _normalize_exclude_csv(_env_text("AUTO_REMEDIATE_EXCLUDE_CSV", ""))
source_ref = _ref_from_branch(args.drift_branch)
target_ref = _ref_from_branch(args.baseline_branch)
repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}"
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
cutoff = dt.datetime.now(dt.timezone.utc) - dt.timedelta(hours=lookback_hours)
completed = _query_completed_prs(repo_api, headers, source_ref, target_ref)
candidate: dict[str, Any] | None = None
candidate_threads: list[dict[str, Any]] = []
candidate_comments: list[str] = []
for pr in completed:
closed_at = _parse_iso_utc(str(pr.get("closedDate", "") or ""))
if closed_at and closed_at < cutoff:
continue
merge_commit = (((pr.get("lastMergeCommit") or {}).get("commitId")) or "").strip()
if not merge_commit:
continue
pr_id = int(pr.get("pullRequestId"))
threads = _threads(repo_api, headers, pr_id)
comments = _thread_comment_contents(threads)
if not _has_reject_signal(comments):
continue
if _has_merge_marker(comments, merge_commit):
continue
candidate = pr
candidate_threads = threads
candidate_comments = comments
break
if not candidate:
print("No merged rolling PR requiring post-merge remediation was found.")
return 0
pr_id = int(candidate.get("pullRequestId"))
merge_commit = (((candidate.get("lastMergeCommit") or {}).get("commitId")) or "").strip()
rejected_paths = _rejected_ticket_paths(candidate_threads)
restore_mode = "full"
restore_paths_csv = ""
if args.workload == "intune" and rejected_paths:
restore_mode = "selective"
restore_paths_csv = ",".join(rejected_paths)
print(f"Post-merge remediation scope: selective ({len(rejected_paths)} rejected path(s)).")
for path in rejected_paths:
print(f" - {path}")
else:
print("Post-merge remediation scope: full.")
try:
queued = _queue_restore_pipeline(
collection_uri=collection_uri,
project=project,
headers=headers,
definition_id=definition_id,
baseline_branch=args.baseline_branch,
include_entra_update=include_entra_update,
dry_run=dry_run,
update_assignments=update_assignments,
remove_unmanaged=remove_unmanaged,
max_workers=max_workers,
exclude_csv=exclude_csv,
restore_mode=restore_mode,
restore_paths_csv=restore_paths_csv,
)
except Exception as exc:
if _is_permission_error(exc):
print(
"WARNING: Post-merge remediation queue skipped due permissions. "
f"Definition={definition_id}. Details: {exc}"
)
_diagnose_queue_permission(collection_uri, project, headers, definition_id)
print(
"Grant 'Queue builds' permission for this pipeline identity on the restore pipeline "
"and ensure the pipeline has access to run it."
)
return 0
raise
build_id = queued.get("id")
build_url = ((queued.get("_links") or {}).get("web") or {}).get("href", "")
if not build_url and build_id:
build_url = f"{collection_uri}/{project}/_build/results?buildId={build_id}"
marker = f"Automation marker: {MERGE_MARKER_PREFIX}{merge_commit}"
comment = (
"Auto-remediation queued after merged rolling PR with reviewer /reject decision(s).\n\n"
f"Workload: {args.workload}\n"
f"Merged PR: #{pr_id}\n"
f"Merge commit: {merge_commit}\n"
f"Restore pipeline definition: {definition_id}\n"
f"Restore run: {build_url or '(queued)'}\n\n"
f"{marker}"
)
try:
_post_pr_thread(repo_api, headers, pr_id, comment)
except Exception as exc:
print(f"WARNING: Restore queued, but failed posting merge marker comment on PR #{pr_id}: {exc}")
print(
f"Queued post-merge remediation for PR #{pr_id} (merge_commit={merge_commit}, buildId={build_id})."
)
return 0
if __name__ == "__main__":
try:
raise SystemExit(main())
except Exception as exc:
print(f"WARNING: Failed post-merge remediation check: {exc}", file=sys.stderr)
raise

View File

@@ -0,0 +1,273 @@
#!/usr/bin/env python3
"""Resolve Conditional Access GUID references to display names in backup JSON."""
from __future__ import annotations
import argparse
import json
import pathlib
import urllib.error
import urllib.parse
import urllib.request
SPECIAL_APP_IDS = {
"All": "All applications",
"None": "None",
"Office365": "Office 365",
"MicrosoftAdminPortals": "Microsoft Admin Portals",
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--root", required=True, help="Path to workload backup root (for Entra: tenant-state/entra).")
parser.add_argument("--token", required=True, help="Microsoft Graph access token.")
return parser.parse_args()
class GraphResolver:
def __init__(self, token: str):
self.token = token.strip()
self.group_cache: dict[str, str | None] = {}
self.role_cache: dict[str, str | None] = {}
self.app_cache: dict[str, str | None] = {}
self.location_cache: dict[str, str | None] = {}
self.auth_strength_cache: dict[str, str | None] = {}
self._warned: set[str] = set()
def _warn_once(self, key: str, message: str) -> None:
if key in self._warned:
return
self._warned.add(key)
print(f"Warning: {message}")
def _get(self, url: str) -> dict | None:
req = urllib.request.Request(
url,
headers={
"Authorization": f"Bearer {self.token}",
"Accept": "application/json",
},
method="GET",
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
if exc.code == 404:
return None
self._warn_once(url, f"Graph lookup failed for {url} (HTTP {exc.code})")
return None
except Exception as exc: # noqa: BLE001
self._warn_once(url, f"Graph lookup failed for {url} ({exc})")
return None
def group_name(self, group_id: str) -> str | None:
if group_id in self.group_cache:
return self.group_cache[group_id]
url = (
"https://graph.microsoft.com/v1.0/groups/"
+ urllib.parse.quote(group_id)
+ "?$select=id,displayName"
)
payload = self._get(url)
name = payload.get("displayName") if isinstance(payload, dict) else None
self.group_cache[group_id] = name
return name
def role_name(self, role_template_id: str) -> str | None:
if role_template_id in self.role_cache:
return self.role_cache[role_template_id]
url = (
"https://graph.microsoft.com/v1.0/directoryRoleTemplates/"
+ urllib.parse.quote(role_template_id)
+ "?$select=id,displayName"
)
payload = self._get(url)
name = payload.get("displayName") if isinstance(payload, dict) else None
self.role_cache[role_template_id] = name
return name
def app_name(self, app_or_object_id: str) -> str | None:
if app_or_object_id in SPECIAL_APP_IDS:
return SPECIAL_APP_IDS[app_or_object_id]
if app_or_object_id in self.app_cache:
return self.app_cache[app_or_object_id]
# CA app conditions usually use appId; try appId lookup first.
url = (
"https://graph.microsoft.com/v1.0/servicePrincipals"
+ "?$select=id,appId,displayName"
+ "&$top=1"
+ "&$filter=appId eq '"
+ urllib.parse.quote(app_or_object_id)
+ "'"
)
payload = self._get(url)
name = None
if isinstance(payload, dict):
value = payload.get("value")
if isinstance(value, list) and value:
first = value[0]
if isinstance(first, dict):
name = first.get("displayName")
if not name:
# Fallback: treat value as service principal object id.
by_id_url = (
"https://graph.microsoft.com/v1.0/servicePrincipals/"
+ urllib.parse.quote(app_or_object_id)
+ "?$select=id,appId,displayName"
)
by_id = self._get(by_id_url)
if isinstance(by_id, dict):
name = by_id.get("displayName")
self.app_cache[app_or_object_id] = name
return name
def location_name(self, location_id: str) -> str | None:
if location_id in self.location_cache:
return self.location_cache[location_id]
if location_id in {"All", "AllTrusted"}:
name = "All locations" if location_id == "All" else "All trusted locations"
self.location_cache[location_id] = name
return name
url = (
"https://graph.microsoft.com/v1.0/identity/conditionalAccess/namedLocations/"
+ urllib.parse.quote(location_id)
+ "?$select=id,displayName"
)
payload = self._get(url)
name = payload.get("displayName") if isinstance(payload, dict) else None
self.location_cache[location_id] = name
return name
def auth_strength_name(self, auth_strength_id: str) -> str | None:
if auth_strength_id in self.auth_strength_cache:
return self.auth_strength_cache[auth_strength_id]
url = (
"https://graph.microsoft.com/beta/identity/conditionalAccess/authenticationStrength/policies/"
+ urllib.parse.quote(auth_strength_id)
+ "?$select=id,displayName"
)
payload = self._get(url)
name = payload.get("displayName") if isinstance(payload, dict) else None
self.auth_strength_cache[auth_strength_id] = name
return name
def resolve_id_list(
values: list,
lookup_fn,
) -> list[dict[str, str]]:
resolved: list[dict[str, str]] = []
for raw in values:
if not isinstance(raw, str) or not raw:
continue
resolved.append(
{
"id": raw,
"displayName": lookup_fn(raw) or "Unresolved",
}
)
return resolved
def main() -> int:
args = parse_args()
root = pathlib.Path(args.root).resolve()
token = args.token.strip()
if not token:
print("No Graph token provided. Skipping Conditional Access reference enrichment.")
return 0
ca_dir = root / "Conditional Access"
if not ca_dir.exists():
print(f"Conditional Access folder not found at {ca_dir}. Skipping.")
return 0
resolver = GraphResolver(token)
updated_files = 0
processed_files = 0
for file_path in sorted(ca_dir.glob("*.json")):
try:
payload = json.loads(file_path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
continue
if not isinstance(payload, dict):
continue
processed_files += 1
changed = False
conditions = payload.get("conditions")
if not isinstance(conditions, dict):
conditions = {}
users = conditions.get("users")
if isinstance(users, dict):
for key, lookup in (
("includeGroups", resolver.group_name),
("excludeGroups", resolver.group_name),
("includeRoles", resolver.role_name),
("excludeRoles", resolver.role_name),
):
value = users.get(key)
if isinstance(value, list):
resolved_key = f"{key}Resolved"
resolved_value = resolve_id_list(value, lookup)
if users.get(resolved_key) != resolved_value:
users[resolved_key] = resolved_value
changed = True
apps = conditions.get("applications")
if isinstance(apps, dict):
for key in ("includeApplications", "excludeApplications"):
value = apps.get(key)
if isinstance(value, list):
resolved_key = f"{key}Resolved"
resolved_value = resolve_id_list(value, resolver.app_name)
if apps.get(resolved_key) != resolved_value:
apps[resolved_key] = resolved_value
changed = True
locations = conditions.get("locations")
if isinstance(locations, dict):
for key in ("includeLocations", "excludeLocations"):
value = locations.get(key)
if isinstance(value, list):
resolved_key = f"{key}Resolved"
resolved_value = resolve_id_list(value, resolver.location_name)
if locations.get(resolved_key) != resolved_value:
locations[resolved_key] = resolved_value
changed = True
grant_controls = payload.get("grantControls")
if isinstance(grant_controls, dict):
auth_strength = grant_controls.get("authenticationStrength")
if isinstance(auth_strength, dict):
auth_strength_id = auth_strength.get("id")
if isinstance(auth_strength_id, str) and auth_strength_id:
resolved = {
"id": auth_strength_id,
"displayName": resolver.auth_strength_name(auth_strength_id) or "Unresolved",
}
if grant_controls.get("authenticationStrengthResolved") != resolved:
grant_controls["authenticationStrengthResolved"] = resolved
changed = True
if changed:
file_path.write_text(json.dumps(payload, indent=5, ensure_ascii=False) + "\n", encoding="utf-8")
updated_files += 1
print(
"Conditional Access GUID enrichment complete. "
+ f"Processed files: {processed_files}. "
+ f"Updated files: {updated_files}."
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
"""Validate backup outputs for Intune and Entra workloads."""
from __future__ import annotations
import argparse
from pathlib import Path
def to_bool(value: str) -> bool:
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--workload", required=True, choices=["intune", "entra"])
parser.add_argument("--mode", default="light", choices=["light", "full"])
parser.add_argument("--root", required=True, help="Workload backup root path.")
parser.add_argument("--reports-root", required=True, help="Workload reports root path.")
parser.add_argument("--include-named-locations", default="false")
parser.add_argument("--include-authentication-strengths", default="false")
parser.add_argument("--include-conditional-access", default="false")
parser.add_argument("--include-enterprise-applications", default="false")
parser.add_argument("--include-enterprise-applications-effective", default="false")
parser.add_argument("--include-app-registrations", default="false")
parser.add_argument("--include-app-registrations-effective", default="false")
return parser.parse_args()
def _require_file(path: Path, label: str, errors: list[str]) -> None:
if not path.is_file():
errors.append(f"Missing {label}: {path}")
def _json_count(root: Path) -> int:
if not root.exists():
return 0
return sum(1 for _ in root.rglob("*.json"))
def _validate_intune(root: Path, reports_root: Path, errors: list[str]) -> None:
if not root.exists():
errors.append(f"Missing Intune backup root: {root}")
return
json_count = _json_count(root)
if json_count == 0:
errors.append(f"Intune backup root has no JSON exports: {root}")
_require_file(reports_root / "policy-assignments.md", "Intune assignment markdown report", errors)
_require_file(reports_root / "policy-assignments.csv", "Intune assignment CSV report", errors)
_require_file(reports_root / "object-inventory-all.csv", "Intune object inventory CSV", errors)
if errors:
return
print(f"Intune output validation passed: jsonFiles={json_count}")
def _validate_entra(root: Path, reports_root: Path, args: argparse.Namespace, errors: list[str]) -> None:
if not root.exists():
errors.append(f"Missing Entra backup root: {root}")
return
include_named_locations = to_bool(args.include_named_locations)
include_auth_strengths = to_bool(args.include_authentication_strengths)
include_conditional_access = to_bool(args.include_conditional_access)
include_enterprise_apps = to_bool(args.include_enterprise_applications)
include_enterprise_apps_effective = to_bool(args.include_enterprise_applications_effective)
include_app_registrations = to_bool(args.include_app_registrations)
include_app_registrations_effective = to_bool(args.include_app_registrations_effective)
expected_category_indexes: list[tuple[str, bool]] = [
("Named Locations", include_named_locations),
("Authentication Strengths", include_auth_strengths),
("Conditional Access", include_conditional_access),
("App Registrations", include_app_registrations_effective),
("Enterprise Applications", include_enterprise_apps_effective),
]
for category_name, is_required in expected_category_indexes:
if not is_required:
continue
index_path = root / category_name / f"{category_name}.md"
_require_file(index_path, f"Entra export index for '{category_name}'", errors)
_require_file(reports_root / "object-inventory-all.csv", "Entra object inventory CSV", errors)
if include_conditional_access:
_require_file(reports_root / "policy-assignments.md", "Entra assignment markdown report", errors)
_require_file(reports_root / "policy-assignments.csv", "Entra assignment CSV report", errors)
if include_app_registrations_effective or include_enterprise_apps_effective:
_require_file(reports_root / "apps-inventory.csv", "Entra apps inventory CSV", errors)
if errors:
return
json_count = _json_count(root)
print(
"Entra output validation passed: "
f"jsonFiles={json_count}, "
f"mode={args.mode}, "
f"enterpriseAppsConfigured={str(include_enterprise_apps).lower()}, "
f"enterpriseAppsEffective={str(include_enterprise_apps_effective).lower()}, "
f"appRegistrationsConfigured={str(include_app_registrations).lower()}, "
f"appRegistrationsEffective={str(include_app_registrations_effective).lower()}"
)
def main() -> int:
args = parse_args()
root = Path(args.root).resolve()
reports_root = Path(args.reports_root).resolve()
errors: list[str] = []
if args.workload == "intune":
_validate_intune(root=root, reports_root=reports_root, errors=errors)
else:
_validate_entra(root=root, reports_root=reports_root, args=args, errors=errors)
if errors:
print("Backup output validation failed:")
for item in errors:
print(f" - {item}")
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())