Sync from dev @ 252c1cf
Source: main (252c1cf) Excluded: live tenant exports, generated artifacts, and dev-only tooling.
This commit is contained in:
316
scripts/apply_reviewer_rejections.py
Normal file
316
scripts/apply_reviewer_rejections.py
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Apply per-policy reviewer reject decisions on rolling drift PRs.
|
||||
|
||||
Reviewer decision format inside auto Change Needed threads:
|
||||
- /reject -> remove this file-level drift from rolling PR (reset to baseline)
|
||||
- /accept -> keep this file-level drift
|
||||
|
||||
Latest decision command in the thread wins.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.parse
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# common.py lives in the same directory; ensure it can be imported when the
|
||||
# script is executed directly.
|
||||
_sys_path_inserted = False
|
||||
if __file__:
|
||||
_script_dir = str(Path(__file__).resolve().parent)
|
||||
if _script_dir not in sys.path:
|
||||
sys.path.insert(0, _script_dir)
|
||||
_sys_path_inserted = True
|
||||
|
||||
import common
|
||||
|
||||
if _sys_path_inserted:
|
||||
sys.path.pop(0)
|
||||
|
||||
_request_json = common.request_json
|
||||
_run_git = common.run_git
|
||||
_configure_git_identity = common.configure_git_identity
|
||||
|
||||
AUTO_TICKET_THREAD_PREFIX = "AUTO-CHANGE-TICKET:"
|
||||
THREAD_STATUS_FIXED = 2
|
||||
THREAD_STATUS_WONT_FIX = 3
|
||||
THREAD_STATUS_CLOSED = 4
|
||||
THREAD_STATUS_BY_DESIGN = 5
|
||||
DECISION_RE = re.compile(r"(?im)^\s*(?:/|#)?(?P<decision>reject|accept)\b")
|
||||
|
||||
|
||||
def _run_diff_name_only(repo_root: str, baseline_branch: str, drift_branch: str) -> str:
|
||||
three_dot = f"origin/{baseline_branch}...origin/{drift_branch}"
|
||||
two_dot = f"origin/{baseline_branch}..origin/{drift_branch}"
|
||||
try:
|
||||
return _run_git(repo_root, ["diff", "--name-only", three_dot])
|
||||
except RuntimeError as exc:
|
||||
stderr = str(exc).lower()
|
||||
if "no merge base" not in stderr:
|
||||
raise
|
||||
print(
|
||||
"WARNING: No merge base for rolling branches "
|
||||
f"(origin/{baseline_branch}, origin/{drift_branch}); using direct diff."
|
||||
)
|
||||
return _run_git(repo_root, ["diff", "--name-only", two_dot])
|
||||
|
||||
|
||||
def _git_path_exists(repo_root: str, treeish: str, path: str) -> bool:
|
||||
proc = subprocess.run(
|
||||
["git", "cat-file", "-e", f"{treeish}:{path}"],
|
||||
cwd=repo_root,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
return proc.returncode == 0
|
||||
|
||||
|
||||
def _normalize_branch_name(branch: str) -> str:
|
||||
b = branch.strip()
|
||||
if b.startswith("refs/heads/"):
|
||||
return b[len("refs/heads/") :]
|
||||
return b
|
||||
|
||||
|
||||
def _thread_status_code(thread: dict[str, Any]) -> int:
|
||||
status = thread.get("status")
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
if isinstance(status, str):
|
||||
mapping = {
|
||||
"fixed": THREAD_STATUS_FIXED,
|
||||
"wontfix": THREAD_STATUS_WONT_FIX,
|
||||
"closed": THREAD_STATUS_CLOSED,
|
||||
"bydesign": THREAD_STATUS_BY_DESIGN,
|
||||
}
|
||||
return mapping.get(status.strip().lower(), 1)
|
||||
return 1
|
||||
|
||||
|
||||
def _is_thread_resolved(thread: dict[str, Any]) -> bool:
|
||||
return _thread_status_code(thread) in (
|
||||
THREAD_STATUS_FIXED,
|
||||
THREAD_STATUS_WONT_FIX,
|
||||
THREAD_STATUS_CLOSED,
|
||||
THREAD_STATUS_BY_DESIGN,
|
||||
)
|
||||
|
||||
|
||||
def _ticket_path_from_content(content: str) -> str | None:
|
||||
marker_re = re.compile(r"<!--\s*" + re.escape(AUTO_TICKET_THREAD_PREFIX) + r"(?P<id>[A-Za-z0-9_-]+)\s*-->")
|
||||
match = marker_re.search(content or "")
|
||||
if not match:
|
||||
return None
|
||||
encoded = match.group("id")
|
||||
padding = "=" * ((4 - len(encoded) % 4) % 4)
|
||||
try:
|
||||
return base64.urlsafe_b64decode((encoded + padding).encode("ascii")).decode("utf-8")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _is_doc_like(path: str) -> bool:
|
||||
lp = path.lower()
|
||||
return lp.endswith(".md") or lp.endswith(".markdown") or "/docs/" in lp
|
||||
|
||||
|
||||
def _is_report_like(path: str) -> bool:
|
||||
lp = path.lower()
|
||||
return "/reports/" in lp or "assignment report" in lp
|
||||
|
||||
|
||||
def _latest_thread_decision(comments: list[dict[str, Any]]) -> str | None:
|
||||
decision: str | None = None
|
||||
|
||||
def _comment_sort_key(c: dict[str, Any]) -> tuple[int, int]:
|
||||
try:
|
||||
cid = int(c.get("id", 0))
|
||||
except Exception:
|
||||
cid = 0
|
||||
try:
|
||||
parent = int(c.get("parentCommentId", 0))
|
||||
except Exception:
|
||||
parent = 0
|
||||
return (cid, parent)
|
||||
|
||||
for comment in sorted(comments, key=_comment_sort_key):
|
||||
content = str(comment.get("content", "") or "")
|
||||
match = DECISION_RE.search(content)
|
||||
if match:
|
||||
decision = match.group("decision").lower()
|
||||
return decision
|
||||
|
||||
|
||||
def _post_thread_comment(repo_api: str, pr_id: int, thread_id: int, token: str, content: str) -> None:
|
||||
_request_json(
|
||||
f"{repo_api}/pullrequests/{pr_id}/threads/{thread_id}/comments?api-version=7.1",
|
||||
token=token,
|
||||
method="POST",
|
||||
body={
|
||||
"parentCommentId": 0,
|
||||
"content": content,
|
||||
"commentType": 1,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Apply reviewer /reject decisions for rolling PR threads")
|
||||
parser.add_argument("--repo-root", required=True)
|
||||
parser.add_argument("--workload", required=True)
|
||||
parser.add_argument("--drift-branch", required=True)
|
||||
parser.add_argument("--baseline-branch", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip()
|
||||
if not token:
|
||||
raise SystemExit("SYSTEM_ACCESSTOKEN is empty.")
|
||||
|
||||
collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/")
|
||||
project = os.environ["SYSTEM_TEAMPROJECT"]
|
||||
repository_id = os.environ["BUILD_REPOSITORY_ID"]
|
||||
|
||||
drift_branch = _normalize_branch_name(args.drift_branch)
|
||||
baseline_branch = _normalize_branch_name(args.baseline_branch)
|
||||
|
||||
repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}"
|
||||
source_ref = f"refs/heads/{drift_branch}"
|
||||
target_ref = f"refs/heads/{baseline_branch}"
|
||||
|
||||
query = urllib.parse.urlencode(
|
||||
{
|
||||
"searchCriteria.status": "active",
|
||||
"searchCriteria.sourceRefName": source_ref,
|
||||
"searchCriteria.targetRefName": target_ref,
|
||||
"api-version": "7.1",
|
||||
},
|
||||
quote_via=urllib.parse.quote,
|
||||
safe="/",
|
||||
)
|
||||
payload = _request_json(f"{repo_api}/pullrequests?{query}", token=token)
|
||||
prs = payload.get("value", []) if isinstance(payload, dict) else []
|
||||
if not prs:
|
||||
print("No active rolling PR found; skipping reviewer reject sync.")
|
||||
return 0
|
||||
|
||||
pr = prs[0]
|
||||
pr_id = int(pr.get("pullRequestId"))
|
||||
|
||||
_run_git(args.repo_root, ["fetch", "--quiet", "origin", baseline_branch, drift_branch])
|
||||
diff_paths = _run_diff_name_only(args.repo_root, baseline_branch, drift_branch)
|
||||
changed_paths = {
|
||||
p.strip()
|
||||
for p in diff_paths.splitlines()
|
||||
if p.strip() and not _is_doc_like(p.strip()) and not _is_report_like(p.strip())
|
||||
}
|
||||
if not changed_paths:
|
||||
print("No changed policy paths in rolling PR; nothing to auto-reject.")
|
||||
return 0
|
||||
|
||||
threads_payload = _request_json(f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1", token=token)
|
||||
threads = threads_payload.get("value", []) if isinstance(threads_payload, dict) else []
|
||||
|
||||
rejections: list[tuple[str, int]] = []
|
||||
examined_ticket_threads = 0
|
||||
for thread in threads:
|
||||
comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else []
|
||||
marker_path: str | None = None
|
||||
for c in comments:
|
||||
marker_path = _ticket_path_from_content(str(c.get("content", "") or ""))
|
||||
if marker_path:
|
||||
break
|
||||
if not marker_path:
|
||||
continue
|
||||
examined_ticket_threads += 1
|
||||
if marker_path not in changed_paths:
|
||||
continue
|
||||
|
||||
decision = _latest_thread_decision(comments)
|
||||
if decision == "reject":
|
||||
try:
|
||||
thread_id = int(thread.get("id"))
|
||||
except Exception:
|
||||
thread_id = -1
|
||||
rejections.append((marker_path, thread_id))
|
||||
|
||||
if not rejections:
|
||||
print(
|
||||
"No /reject decisions found in auto policy threads "
|
||||
f"(examined={examined_ticket_threads}, changed_paths={len(changed_paths)})."
|
||||
)
|
||||
return 0
|
||||
|
||||
print(
|
||||
"Detected /reject decisions in auto policy threads: "
|
||||
f"{len(rejections)} (examined={examined_ticket_threads})."
|
||||
)
|
||||
|
||||
_run_git(args.repo_root, ["checkout", "--quiet", "--force", "-B", drift_branch, f"origin/{drift_branch}"])
|
||||
|
||||
changed = 0
|
||||
baseline_tree = f"origin/{baseline_branch}"
|
||||
for path, _thread_id in sorted(set(rejections)):
|
||||
if _git_path_exists(args.repo_root, baseline_tree, path):
|
||||
_run_git(args.repo_root, ["checkout", baseline_tree, "--", path])
|
||||
_run_git(args.repo_root, ["add", "--", path])
|
||||
changed += 1
|
||||
else:
|
||||
file_abs = os.path.join(args.repo_root, path)
|
||||
if os.path.exists(file_abs):
|
||||
_run_git(args.repo_root, ["rm", "-f", "--", path])
|
||||
changed += 1
|
||||
|
||||
proc = subprocess.run(
|
||||
["git", "diff", "--cached", "--quiet"],
|
||||
cwd=args.repo_root,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode == 0:
|
||||
print("Reviewer /reject decisions found, but no effective diff remained after baseline reset.")
|
||||
return 0
|
||||
|
||||
_configure_git_identity(args.repo_root)
|
||||
|
||||
commit_msg = f"Apply reviewer /reject decisions ({args.workload})"
|
||||
_run_git(args.repo_root, ["commit", "-m", commit_msg])
|
||||
_run_git(args.repo_root, ["push", "--force-with-lease", "origin", f"HEAD:{drift_branch}"])
|
||||
|
||||
for path, thread_id in rejections:
|
||||
if thread_id <= 0:
|
||||
continue
|
||||
_post_thread_comment(
|
||||
repo_api=repo_api,
|
||||
pr_id=pr_id,
|
||||
thread_id=thread_id,
|
||||
token=token,
|
||||
content=(
|
||||
"Auto-action: /reject detected. This policy drift was reset to baseline on the rolling drift branch, "
|
||||
"so it is removed from the PR diff.\n\n"
|
||||
"If tenant rollback is required immediately, run restore pipeline as remediation."
|
||||
),
|
||||
)
|
||||
|
||||
print(
|
||||
f"Applied reviewer /reject decisions for {changed} path(s) in PR #{pr_id}; "
|
||||
f"drift branch '{drift_branch}' updated."
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except Exception as exc:
|
||||
print(f"WARNING: Failed to apply reviewer /reject decisions: {exc}", file=sys.stderr)
|
||||
raise
|
||||
395
scripts/commit_entra_drift.py
Normal file
395
scripts/commit_entra_drift.py
Normal file
@@ -0,0 +1,395 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Commit Entra drift changes with best-effort change-author attribution."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import datetime as dt
|
||||
import json
|
||||
import pathlib
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
def _git_run(repo_root: pathlib.Path, args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]:
|
||||
proc = subprocess.run(
|
||||
["git", *args],
|
||||
cwd=str(repo_root),
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if check and proc.returncode != 0:
|
||||
stderr = (proc.stderr or "").strip()
|
||||
raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}")
|
||||
return proc
|
||||
|
||||
|
||||
def _set_output_var(name: str, value: str, is_output: bool = True) -> None:
|
||||
suffix = ";isOutput=true" if is_output else ""
|
||||
print(f"##vso[task.setvariable variable={name}{suffix}]{value}")
|
||||
|
||||
|
||||
def _warning(message: str) -> None:
|
||||
print(f"##vso[task.logissue type=warning]{message}")
|
||||
|
||||
|
||||
def _parse_backup_start(value: str) -> dt.datetime:
|
||||
candidate = value.strip()
|
||||
if not candidate:
|
||||
raise ValueError("Missing required --backup-start value. Ensure the pipeline sets BACKUP_START in the backup_entra job before invoking commit_entra_drift.py.")
|
||||
parsed = dt.datetime.strptime(candidate, "%Y.%m.%d:%H.%M.%S")
|
||||
return parsed.replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
|
||||
def _format_filter_datetime(value: dt.datetime) -> str:
|
||||
return value.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
def _last_entra_commit_date(repo_root: pathlib.Path, depth: int = 30) -> dt.datetime | None:
|
||||
_git_run(repo_root, ["fetch", f"--depth={depth}"], check=False)
|
||||
proc = _git_run(
|
||||
repo_root,
|
||||
[
|
||||
"--no-pager",
|
||||
"log",
|
||||
"--no-show-signature",
|
||||
f"-{depth}",
|
||||
"--format=%s%%%cI",
|
||||
],
|
||||
)
|
||||
for raw in proc.stdout.splitlines():
|
||||
line = raw.strip()
|
||||
if not line or "%%%" not in line:
|
||||
continue
|
||||
subject, iso_date = line.split("%%%", 1)
|
||||
if subject.endswith(" (Entra)") and len(subject) >= 18 and subject[4] == ".":
|
||||
try:
|
||||
return dt.datetime.fromisoformat(iso_date.replace("Z", "+00:00")).astimezone(dt.timezone.utc)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _request_json(url: str, token: str) -> dict:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
method="GET",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Identity:
|
||||
key: str
|
||||
value: str
|
||||
name: str
|
||||
|
||||
|
||||
def _display_or_localpart(display_name: str, principal_name: str) -> str:
|
||||
display_name = (display_name or "").strip()
|
||||
if display_name:
|
||||
return display_name
|
||||
principal_name = (principal_name or "").strip()
|
||||
if "@" in principal_name:
|
||||
return principal_name.split("@", 1)[0]
|
||||
return principal_name
|
||||
|
||||
|
||||
def _extract_identity_from_audit(entry: dict) -> Identity | None:
|
||||
initiated_by = entry.get("initiatedBy")
|
||||
if not isinstance(initiated_by, dict):
|
||||
return None
|
||||
|
||||
user = initiated_by.get("user")
|
||||
if isinstance(user, dict):
|
||||
principal_name = str(user.get("userPrincipalName") or user.get("email") or "").strip()
|
||||
display_name = str(user.get("displayName") or "").strip()
|
||||
if principal_name:
|
||||
return Identity(
|
||||
key=f"user:{principal_name}",
|
||||
value=principal_name,
|
||||
name=_display_or_localpart(display_name, principal_name),
|
||||
)
|
||||
if display_name:
|
||||
return Identity(
|
||||
key=f"display:{display_name}",
|
||||
value=display_name,
|
||||
name=display_name,
|
||||
)
|
||||
|
||||
app = initiated_by.get("app")
|
||||
if isinstance(app, dict):
|
||||
display_name = str(app.get("displayName") or "").strip()
|
||||
if display_name:
|
||||
return Identity(
|
||||
key=f"sp:{display_name}",
|
||||
value=f"{display_name} (SP)",
|
||||
name=display_name,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _fetch_directory_audits(
|
||||
token: str,
|
||||
last_commit_date: dt.datetime | None,
|
||||
backup_start: dt.datetime,
|
||||
) -> list[dict]:
|
||||
params = {
|
||||
"$top": "999",
|
||||
"$select": "activityDateTime,activityDisplayName,category,result,initiatedBy,targetResources",
|
||||
}
|
||||
filter_parts = [f"activityDateTime le {_format_filter_datetime(backup_start)}"]
|
||||
if last_commit_date is not None:
|
||||
filter_parts.append(f"activityDateTime ge {_format_filter_datetime(last_commit_date)}")
|
||||
params["$filter"] = " and ".join(filter_parts)
|
||||
url = f"https://graph.microsoft.com/v1.0/auditLogs/directoryAudits?{urllib.parse.urlencode(params)}"
|
||||
|
||||
results: list[dict] = []
|
||||
while url:
|
||||
payload = _request_json(url, token)
|
||||
value = payload.get("value")
|
||||
if isinstance(value, list):
|
||||
results.extend(item for item in value if isinstance(item, dict))
|
||||
next_link = payload.get("@odata.nextLink")
|
||||
url = str(next_link).strip() if next_link else ""
|
||||
return results
|
||||
|
||||
|
||||
def _resource_id_from_path(path: str) -> str:
|
||||
pure = pathlib.PurePosixPath(path)
|
||||
if pure.suffix.lower() != ".json":
|
||||
return ""
|
||||
stem = pure.stem
|
||||
if "__" not in stem:
|
||||
return ""
|
||||
return stem.rsplit("__", 1)[-1].lstrip("_").strip()
|
||||
|
||||
|
||||
def _category_key(path: str) -> str:
|
||||
pure = pathlib.PurePosixPath(path)
|
||||
parts = pure.parts
|
||||
if len(parts) < 3:
|
||||
return ""
|
||||
return "/".join(parts[:3])
|
||||
|
||||
|
||||
def _fallback_identity(name: str, email: str) -> Identity:
|
||||
return Identity(key=f"fallback:{email}", value=email, name=name)
|
||||
|
||||
|
||||
def _effective_fallback_identity(
|
||||
build_reason: str,
|
||||
requested_for: str,
|
||||
requested_for_email: str,
|
||||
service_name: str,
|
||||
service_email: str,
|
||||
) -> Identity:
|
||||
requested_for_email = requested_for_email.strip()
|
||||
if build_reason.strip() != "Schedule" and "@" in requested_for_email:
|
||||
requested_for = requested_for.strip() or requested_for_email.split("@", 1)[0]
|
||||
return _fallback_identity(requested_for, requested_for_email)
|
||||
return _fallback_identity(service_name.strip(), service_email.strip())
|
||||
|
||||
|
||||
def _changed_files(repo_root: pathlib.Path, workload_root: str) -> list[str]:
|
||||
proc = _git_run(repo_root, ["diff", "--cached", "--name-only", "--", workload_root])
|
||||
return [line.strip() for line in proc.stdout.splitlines() if line.strip()]
|
||||
|
||||
|
||||
def _remote_diff_is_empty(repo_root: pathlib.Path, drift_branch: str, workload_root: str) -> bool:
|
||||
remote_ref = f"refs/remotes/origin/{drift_branch}"
|
||||
if _git_run(repo_root, ["show-ref", "--verify", "--quiet", remote_ref], check=False).returncode != 0:
|
||||
return False
|
||||
return _git_run(repo_root, ["diff", "--quiet", f"origin/{drift_branch}", "--", workload_root], check=False).returncode == 0
|
||||
|
||||
|
||||
def _build_author_groups(
|
||||
changed_files: list[str],
|
||||
audits: list[dict],
|
||||
fallback: Identity,
|
||||
) -> tuple[dict[str, dict[str, list[str] | list[Identity]]], int]:
|
||||
identities_by_resource: dict[str, dict[str, Identity]] = defaultdict(dict)
|
||||
for audit in audits:
|
||||
result = str(audit.get("result") or "").strip().lower()
|
||||
if result and result != "success":
|
||||
continue
|
||||
identity = _extract_identity_from_audit(audit)
|
||||
if identity is None:
|
||||
continue
|
||||
target_resources = audit.get("targetResources")
|
||||
if not isinstance(target_resources, list):
|
||||
continue
|
||||
for target in target_resources:
|
||||
if not isinstance(target, dict):
|
||||
continue
|
||||
resource_id = str(target.get("id") or "").strip()
|
||||
if resource_id:
|
||||
identities_by_resource[resource_id][identity.key] = identity
|
||||
|
||||
resolved_by_category: dict[str, dict[str, Identity]] = defaultdict(dict)
|
||||
file_identities: dict[str, list[Identity]] = {}
|
||||
unresolved_count = 0
|
||||
|
||||
for path in changed_files:
|
||||
resource_id = _resource_id_from_path(path)
|
||||
identities = list(identities_by_resource.get(resource_id, {}).values())
|
||||
if identities:
|
||||
file_identities[path] = sorted(identities, key=lambda item: item.key)
|
||||
for identity in file_identities[path]:
|
||||
resolved_by_category[_category_key(path)][identity.key] = identity
|
||||
else:
|
||||
file_identities[path] = []
|
||||
if resource_id:
|
||||
unresolved_count += 1
|
||||
|
||||
for path in changed_files:
|
||||
if file_identities[path]:
|
||||
continue
|
||||
category_identities = list(resolved_by_category.get(_category_key(path), {}).values())
|
||||
if category_identities:
|
||||
file_identities[path] = sorted(category_identities, key=lambda item: item.key)
|
||||
else:
|
||||
file_identities[path] = [fallback]
|
||||
|
||||
grouped: dict[str, dict[str, list[str] | list[Identity]]] = {}
|
||||
for path in changed_files:
|
||||
identities = file_identities[path] or [fallback]
|
||||
group_key = "&".join(identity.key for identity in identities)
|
||||
entry = grouped.setdefault(group_key, {"files": [], "identities": identities})
|
||||
files = entry["files"]
|
||||
assert isinstance(files, list)
|
||||
files.append(path)
|
||||
|
||||
return grouped, unresolved_count
|
||||
|
||||
|
||||
def _commit_group(
|
||||
repo_root: pathlib.Path,
|
||||
files: list[str],
|
||||
identities: list[Identity],
|
||||
backup_start: dt.datetime,
|
||||
) -> None:
|
||||
for path in files:
|
||||
print(f"\t- Adding {repo_root / path}")
|
||||
_git_run(repo_root, ["add", "--all", "--", path])
|
||||
author_name = ", ".join(identity.name for identity in identities)
|
||||
author_email = ", ".join(identity.value for identity in identities)
|
||||
print(f"\t- Setting commit author(s): {author_name}")
|
||||
_git_run(repo_root, ["config", "user.name", author_name])
|
||||
_git_run(repo_root, ["config", "user.email", author_email])
|
||||
commit_date = backup_start.astimezone(dt.timezone.utc).strftime("%Y.%m.%d_%H.%M")
|
||||
commit_name = f"{commit_date} -- {author_name} (Entra)"
|
||||
print(f"\t- Creating commit '{commit_name}'")
|
||||
_git_run(repo_root, ["commit", "-m", commit_name])
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--repo-root", required=True)
|
||||
parser.add_argument("--workload-root", required=True)
|
||||
parser.add_argument("--baseline-branch", required=True)
|
||||
parser.add_argument("--drift-branch", required=True)
|
||||
parser.add_argument("--access-token", required=True)
|
||||
parser.add_argument("--service-name", required=True)
|
||||
parser.add_argument("--service-email", required=True)
|
||||
parser.add_argument("--build-reason", default="")
|
||||
parser.add_argument("--requested-for", default="")
|
||||
parser.add_argument("--requested-for-email", default="")
|
||||
parser.add_argument("--backup-start", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = pathlib.Path(args.repo_root).resolve()
|
||||
workload_root = args.workload_root.strip().strip("/")
|
||||
fallback = _effective_fallback_identity(
|
||||
build_reason=args.build_reason,
|
||||
requested_for=args.requested_for,
|
||||
requested_for_email=args.requested_for_email,
|
||||
service_name=args.service_name,
|
||||
service_email=args.service_email,
|
||||
)
|
||||
|
||||
_git_run(repo_root, ["config", "user.name", fallback.name])
|
||||
_git_run(repo_root, ["config", "user.email", fallback.value])
|
||||
_git_run(repo_root, ["add", "--all", "--", workload_root])
|
||||
|
||||
changed_files = _changed_files(repo_root, workload_root)
|
||||
if not changed_files:
|
||||
print("No Entra change detected")
|
||||
_set_output_var("CHANGE_DETECTED", "0")
|
||||
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "0")
|
||||
return 0
|
||||
|
||||
if _remote_diff_is_empty(repo_root, args.drift_branch, workload_root):
|
||||
print("No Entra change detected (snapshot identical to existing drift branch)")
|
||||
_set_output_var("CHANGE_DETECTED", "0")
|
||||
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "1")
|
||||
return 0
|
||||
|
||||
backup_start = _parse_backup_start(args.backup_start)
|
||||
last_commit_date = _last_entra_commit_date(repo_root)
|
||||
if last_commit_date is None:
|
||||
_warning("Unable to obtain date of the last Entra backup config commit. All Entra audit events in the current query window will be considered.")
|
||||
|
||||
audits: list[dict] = []
|
||||
try:
|
||||
print("Getting Entra directory audit logs")
|
||||
print(f"\t- from: '{last_commit_date}' (UTC) to: '{backup_start}' (UTC)")
|
||||
audits = _fetch_directory_audits(args.access_token, last_commit_date, backup_start)
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code in (401, 403):
|
||||
_warning("Graph token cannot read Entra directory audit logs. Falling back to pipeline identity for unresolved Entra changes.")
|
||||
else:
|
||||
raise
|
||||
except Exception as exc: # pragma: no cover - defensive path for pipeline runtime issues
|
||||
_warning(f"Unable to query Entra directory audit logs ({exc}). Falling back to pipeline identity for unresolved Entra changes.")
|
||||
|
||||
groups, unresolved_count = _build_author_groups(changed_files, audits, fallback)
|
||||
if unresolved_count > 0:
|
||||
_warning(
|
||||
f"Unable to resolve author from Entra audit logs for {unresolved_count} of {len(changed_files)} changed files. Fallback identity used where needed."
|
||||
)
|
||||
|
||||
_git_run(repo_root, ["reset", "--quiet", "--", workload_root])
|
||||
print("\nCommit changes")
|
||||
for group in groups.values():
|
||||
files = group["files"]
|
||||
identities = group["identities"]
|
||||
assert isinstance(files, list)
|
||||
assert isinstance(identities, list)
|
||||
_commit_group(repo_root, files, identities, backup_start)
|
||||
unpushed = _git_run(repo_root, ["cherry", "-v", f"origin/{args.baseline_branch}"]).stdout.strip()
|
||||
if not unpushed:
|
||||
_warning("Nothing to commit?! This shouldn't happen.")
|
||||
_set_output_var("CHANGE_DETECTED", "0")
|
||||
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "0")
|
||||
return 0
|
||||
|
||||
_git_run(repo_root, ["push", "--force-with-lease", "origin", f"HEAD:{args.drift_branch}"])
|
||||
commit_sha = _git_run(repo_root, ["rev-parse", "HEAD"]).stdout.strip()
|
||||
modification_authors = sorted({identity.value for group in groups.values() for identity in group["identities"]}) # type: ignore[index]
|
||||
_set_output_var("CHANGE_DETECTED", "1")
|
||||
_set_output_var("ROLLING_PR_SYNC_REQUIRED", "1")
|
||||
_set_output_var("COMMIT_SHA", commit_sha)
|
||||
_set_output_var("COMMIT_DATE", backup_start.strftime("%Y.%m.%d_%H.%M"))
|
||||
_set_output_var("MODIFICATION_AUTHOR", ", ".join(modification_authors))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except Exception as exc:
|
||||
print(str(exc), file=sys.stderr)
|
||||
raise
|
||||
164
scripts/common.py
Normal file
164
scripts/common.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Shared utilities for Intune / Entra drift backup scripts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from typing import Any
|
||||
|
||||
|
||||
def env_text(name: str, default: str = "") -> str:
|
||||
"""Read and sanitize an environment variable, treating unresolved Azure DevOps
|
||||
macros $(...) as empty.
|
||||
"""
|
||||
raw = os.environ.get(name)
|
||||
if raw is None:
|
||||
return default
|
||||
value = raw.strip()
|
||||
if re.fullmatch(r"\$\([^)]+\)", value):
|
||||
return default
|
||||
if not value:
|
||||
return default
|
||||
return value
|
||||
|
||||
|
||||
def env_bool(name: str, default: bool = False) -> bool:
|
||||
"""Interpret an environment variable as a boolean."""
|
||||
raw = env_text(name, "")
|
||||
if not raw:
|
||||
return default
|
||||
return raw.lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def normalize_exclude_csv(value: str) -> str:
|
||||
"""Normalize an exclude CSV value, treating sentinel values as empty."""
|
||||
normalized = str(value or "").strip()
|
||||
if normalized.lower() in {"", "none", "null", "n/a", "-", "_none_"}:
|
||||
return ""
|
||||
return normalized
|
||||
|
||||
|
||||
def normalize_merge_strategy(value: str) -> str:
|
||||
"""Normalize a merge strategy string to an Azure DevOps API value."""
|
||||
raw = (value or "").strip().lower().replace("-", "").replace("_", "")
|
||||
aliases = {
|
||||
"nofastforward": "noFastForward",
|
||||
"mergecommit": "noFastForward",
|
||||
"merge": "noFastForward",
|
||||
"squash": "squash",
|
||||
"rebase": "rebase",
|
||||
"rebasefastforward": "rebase",
|
||||
"rebaseff": "rebase",
|
||||
"rebasemerge": "rebaseMerge",
|
||||
}
|
||||
return aliases.get(raw, "rebase")
|
||||
|
||||
|
||||
def _get_retry_after_seconds(error: urllib.error.HTTPError) -> float | None:
|
||||
try:
|
||||
retry_after = error.headers.get("Retry-After")
|
||||
if retry_after:
|
||||
return float(retry_after)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def request_json(
|
||||
url: str,
|
||||
method: str = "GET",
|
||||
body: dict[str, Any] | None = None,
|
||||
headers: dict[str, str] | None = None,
|
||||
token: str | None = None,
|
||||
timeout: float = 60,
|
||||
max_retries: int = 0,
|
||||
) -> Any:
|
||||
"""Make a JSON HTTP request and return the parsed response.
|
||||
|
||||
If *token* is provided, an Authorization header is added automatically.
|
||||
If *max_retries* is greater than zero, transient HTTP errors (429, 500,
|
||||
502, 503, 504) are retried with exponential back-off.
|
||||
"""
|
||||
req_headers: dict[str, str] = {
|
||||
"Accept": "application/json",
|
||||
}
|
||||
if token is not None:
|
||||
req_headers["Authorization"] = f"Bearer {token}"
|
||||
if headers is not None:
|
||||
req_headers.update(headers)
|
||||
|
||||
payload: bytes | None = None
|
||||
if body is not None:
|
||||
payload = json.dumps(body).encode("utf-8")
|
||||
req_headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
retry_codes = {429, 500, 502, 503, 504}
|
||||
last_error: Exception | None = None
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=payload,
|
||||
method=method,
|
||||
headers=req_headers,
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as exc:
|
||||
last_error = exc
|
||||
if exc.code not in retry_codes or attempt == max_retries:
|
||||
raise
|
||||
retry_after = _get_retry_after_seconds(exc)
|
||||
sleep = retry_after if retry_after is not None else (2 ** attempt)
|
||||
time.sleep(sleep)
|
||||
except urllib.error.URLError as exc:
|
||||
last_error = exc
|
||||
if attempt == max_retries:
|
||||
raise
|
||||
time.sleep(2 ** attempt)
|
||||
|
||||
# Should never be reached; satisfy type checker.
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
raise RuntimeError("request_json exhausted all retries")
|
||||
|
||||
|
||||
def run_git(repo_root: str | os.PathLike[str], args: list[str], check: bool = True) -> str:
|
||||
"""Run a git command and return stdout as a stripped string."""
|
||||
proc = subprocess.run(
|
||||
["git", *args],
|
||||
cwd=str(repo_root),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if check and proc.returncode != 0:
|
||||
stderr = (proc.stderr or "").strip()
|
||||
raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}")
|
||||
return proc.stdout.strip()
|
||||
|
||||
|
||||
def configure_git_identity(
|
||||
repo_root: str | os.PathLike[str],
|
||||
fallback_name: str | None = None,
|
||||
fallback_email: str | None = None,
|
||||
) -> None:
|
||||
"""Configure git user.name and user.email from pipeline env vars."""
|
||||
requested_for = (os.environ.get("BUILD_REQUESTEDFOR") or "").strip()
|
||||
requested_for_email = (os.environ.get("BUILD_REQUESTEDFOREMAIL") or "").strip()
|
||||
fallback_name = (fallback_name or os.environ.get("USER_NAME") or "ASTRAL Backup Service").strip()
|
||||
fallback_email = (fallback_email or os.environ.get("USER_EMAIL") or "intune-backup@local.invalid").strip()
|
||||
|
||||
author_name = requested_for or fallback_name
|
||||
author_email = requested_for_email if "@" in requested_for_email else fallback_email
|
||||
|
||||
run_git(repo_root, ["config", "user.name", author_name])
|
||||
run_git(repo_root, ["config", "user.email", author_email])
|
||||
203
scripts/diagnostics/precheck_azure_openai_availability.py
Normal file
203
scripts/diagnostics/precheck_azure_openai_availability.py
Normal file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Lightweight Azure OpenAI availability precheck for pipeline diagnostics.
|
||||
|
||||
This script is intentionally non-blocking: it always exits 0.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import quote, urlsplit
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
return os.environ.get(name, default).strip()
|
||||
|
||||
|
||||
def _set_pipeline_var(name: str, value: str) -> None:
|
||||
print(f"##vso[task.setvariable variable={name}]{value}")
|
||||
|
||||
|
||||
def _normalize_aoai_endpoint(endpoint: str) -> str:
|
||||
cleaned = endpoint.strip().rstrip("/")
|
||||
if not cleaned:
|
||||
return cleaned
|
||||
|
||||
parsed = urlsplit(cleaned)
|
||||
if parsed.scheme and parsed.netloc:
|
||||
cleaned = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
marker = "/openai"
|
||||
idx = cleaned.lower().find(marker)
|
||||
if idx != -1:
|
||||
return cleaned[:idx]
|
||||
return cleaned
|
||||
|
||||
|
||||
def _preferred_aoai_token_param(deployment_name: str) -> str:
|
||||
override = _env("AZURE_OPENAI_TOKEN_PARAM", "").lower()
|
||||
if override in {"max_tokens", "max_completion_tokens"}:
|
||||
return override
|
||||
if deployment_name.strip().lower().startswith("gpt-5"):
|
||||
return "max_completion_tokens"
|
||||
return "max_tokens"
|
||||
|
||||
|
||||
def _aoai_token_param_candidates(deployment_name: str) -> list[str]:
|
||||
preferred = _preferred_aoai_token_param(deployment_name)
|
||||
alternate = "max_completion_tokens" if preferred == "max_tokens" else "max_tokens"
|
||||
return [preferred, alternate]
|
||||
|
||||
|
||||
def _preferred_aoai_temperature(deployment_name: str) -> float | None:
|
||||
override = _env("AZURE_OPENAI_TEMPERATURE", "").lower()
|
||||
if override in {"default", "none", "omit"}:
|
||||
return None
|
||||
if override:
|
||||
try:
|
||||
return float(override)
|
||||
except ValueError:
|
||||
return None
|
||||
if deployment_name.strip().lower().startswith("gpt-5"):
|
||||
return None
|
||||
return 0.0
|
||||
|
||||
|
||||
def _aoai_temperature_candidates(deployment_name: str) -> list[float | None]:
|
||||
preferred = _preferred_aoai_temperature(deployment_name)
|
||||
if preferred is None:
|
||||
return [None]
|
||||
return [preferred, None]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
enabled = _env("ENABLE_PR_AI_SUMMARY", "true").lower() == "true"
|
||||
if not enabled:
|
||||
print("Azure OpenAI precheck skipped: ENABLE_PR_AI_SUMMARY=false")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
|
||||
endpoint = _env("AZURE_OPENAI_ENDPOINT")
|
||||
deployment = _env("AZURE_OPENAI_DEPLOYMENT")
|
||||
api_key = _env("AZURE_OPENAI_API_KEY")
|
||||
api_version = _env("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
|
||||
|
||||
if not endpoint or not deployment or not api_key:
|
||||
print("Azure OpenAI precheck skipped: missing endpoint/deployment/api-key variable")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
|
||||
endpoint_raw = endpoint
|
||||
endpoint = _normalize_aoai_endpoint(endpoint_raw)
|
||||
deployment_url = f"{endpoint}/openai/deployments/{quote(deployment)}/chat/completions?api-version={quote(api_version)}"
|
||||
v1_url = f"{endpoint}/openai/v1/chat/completions"
|
||||
|
||||
print("Azure OpenAI precheck: starting")
|
||||
print(f"- endpoint(raw): {endpoint_raw}")
|
||||
print(f"- endpoint(normalized): {endpoint}")
|
||||
print(f"- deployment: {deployment}")
|
||||
print(f"- api_version: {api_version}")
|
||||
prefer_v1 = endpoint.lower().endswith(".cognitiveservices.azure.com")
|
||||
health_messages = [
|
||||
{"role": "system", "content": "You are a health-check assistant."},
|
||||
{"role": "user", "content": "Reply with: OK"},
|
||||
]
|
||||
|
||||
for temperature in _aoai_temperature_candidates(deployment):
|
||||
temperature_unsupported = False
|
||||
for token_param in _aoai_token_param_candidates(deployment):
|
||||
deployment_payload = {
|
||||
"messages": health_messages,
|
||||
token_param: 16,
|
||||
}
|
||||
v1_payload = {
|
||||
"model": deployment,
|
||||
"messages": health_messages,
|
||||
token_param: 16,
|
||||
}
|
||||
if temperature is not None:
|
||||
deployment_payload["temperature"] = temperature
|
||||
v1_payload["temperature"] = temperature
|
||||
|
||||
routes = (
|
||||
[("v1", v1_url, v1_payload), ("deployments", deployment_url, deployment_payload)]
|
||||
if prefer_v1
|
||||
else [("deployments", deployment_url, deployment_payload), ("v1", v1_url, v1_payload)]
|
||||
)
|
||||
|
||||
token_param_unsupported = False
|
||||
for route_name, route_url, payload in routes:
|
||||
req = Request(
|
||||
url=route_url,
|
||||
method="POST",
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"api-key": api_key,
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urlopen(req, timeout=45) as resp:
|
||||
_ = json.loads(resp.read().decode("utf-8"))
|
||||
print(f"Azure OpenAI precheck: SUCCESS via {route_name} route")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "1")
|
||||
return 0
|
||||
except HTTPError as exc:
|
||||
raw = ""
|
||||
try:
|
||||
raw = exc.read().decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
raw = ""
|
||||
print(f"Azure OpenAI precheck: HTTP {exc.code} via {route_name} route")
|
||||
if raw:
|
||||
print(raw)
|
||||
if exc.code == 400:
|
||||
raw_lower = raw.lower()
|
||||
if "unsupported parameter" in raw_lower and f"'{token_param}'" in raw_lower:
|
||||
token_param_unsupported = True
|
||||
break
|
||||
if "unsupported value" in raw_lower and "'temperature'" in raw_lower and temperature is not None:
|
||||
temperature_unsupported = True
|
||||
break
|
||||
if exc.code == 404:
|
||||
# Try fallback route first.
|
||||
continue
|
||||
if exc.code in (401, 403):
|
||||
print("Hint: Check AZURE_OPENAI_API_KEY and endpoint/resource pairing.")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
if exc.code == 400:
|
||||
print("Hint: Check model/deployment name and API version compatibility.")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
except URLError as exc:
|
||||
print(f"Azure OpenAI precheck: network error via {route_name} route: {exc}")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
except Exception as exc: # pragma: no cover
|
||||
print(f"Azure OpenAI precheck: unexpected error via {route_name} route: {exc}")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
if temperature_unsupported:
|
||||
break
|
||||
if not token_param_unsupported:
|
||||
break
|
||||
if not temperature_unsupported:
|
||||
break
|
||||
|
||||
print("Azure OpenAI precheck: no successful response from tested routes/token-params")
|
||||
print("Hint: Verify AZURE_OPENAI_ENDPOINT points to the resource root, without /openai path suffix.")
|
||||
print("Hint: Verify AZURE_OPENAI_DEPLOYMENT is the deployment name (for v1 this is passed as model).")
|
||||
_set_pipeline_var("AOAI_AVAILABLE", "0")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
651
scripts/ensure_rolling_pr.py
Normal file
651
scripts/ensure_rolling_pr.py
Normal file
@@ -0,0 +1,651 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Create/update rolling drift PR and optionally queue remediation after rejection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.parse
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# common.py lives in the same directory; ensure it can be imported when the
|
||||
# script is executed directly.
|
||||
_sys_path_inserted = False
|
||||
if __file__:
|
||||
_script_dir = str(Path(__file__).resolve().parent)
|
||||
if _script_dir not in sys.path:
|
||||
sys.path.insert(0, _script_dir)
|
||||
_sys_path_inserted = True
|
||||
|
||||
import common
|
||||
|
||||
if _sys_path_inserted:
|
||||
sys.path.pop(0)
|
||||
|
||||
_env_text = common.env_text
|
||||
_env_bool = common.env_bool
|
||||
_normalize_exclude_csv = common.normalize_exclude_csv
|
||||
_normalize_merge_strategy = common.normalize_merge_strategy
|
||||
_request_json = common.request_json
|
||||
_run_git = common.run_git
|
||||
|
||||
|
||||
def _query_prs(
|
||||
repo_api: str,
|
||||
headers: dict[str, str],
|
||||
source_ref: str,
|
||||
target_ref: str,
|
||||
status: str,
|
||||
) -> list[dict[str, Any]]:
|
||||
query = urllib.parse.urlencode(
|
||||
{
|
||||
"searchCriteria.status": status,
|
||||
"searchCriteria.sourceRefName": source_ref,
|
||||
"searchCriteria.targetRefName": target_ref,
|
||||
"api-version": "7.1",
|
||||
},
|
||||
quote_via=urllib.parse.quote,
|
||||
safe="/",
|
||||
)
|
||||
url = f"{repo_api}/pullrequests?{query}"
|
||||
payload = _request_json(url, headers=headers)
|
||||
return payload.get("value", []) if isinstance(payload, dict) else []
|
||||
|
||||
|
||||
def _normalize_branch(branch: str) -> str:
|
||||
b = branch.strip()
|
||||
if b.startswith("refs/heads/"):
|
||||
return b[len("refs/heads/") :]
|
||||
return b
|
||||
|
||||
|
||||
def _ref_from_branch(branch: str) -> str:
|
||||
return f"refs/heads/{_normalize_branch(branch)}"
|
||||
|
||||
|
||||
def _pr_web_url(pr_payload: dict[str, Any]) -> str:
|
||||
pr_id = pr_payload.get("pullRequestId")
|
||||
return (
|
||||
pr_payload.get("url", "")
|
||||
.replace("_apis/git/repositories", "_git")
|
||||
.replace(f"/pullRequests/{pr_id}", f"/pullrequest/{pr_id}")
|
||||
)
|
||||
|
||||
|
||||
|
||||
def _current_tree_id(repo_root: str) -> str:
|
||||
return _run_git(repo_root, ["rev-parse", "HEAD^{tree}"])
|
||||
|
||||
|
||||
def _tree_id_for_commitish(repo_root: str, commitish: str) -> str:
|
||||
return _run_git(repo_root, ["rev-parse", f"{commitish}^{{tree}}"])
|
||||
|
||||
|
||||
def _ref_has_commit(repo_root: str, ref: str) -> bool:
|
||||
proc = subprocess.run(
|
||||
["git", "rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"],
|
||||
cwd=repo_root,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
return proc.returncode == 0
|
||||
|
||||
|
||||
def _commit_tree_id(repo_api: str, headers: dict[str, str], commit_id: str) -> str:
|
||||
url = f"{repo_api}/commits/{commit_id}?api-version=7.1"
|
||||
payload = _request_json(url, headers=headers)
|
||||
tree_id = payload.get("treeId", "") if isinstance(payload, dict) else ""
|
||||
return tree_id.strip()
|
||||
|
||||
|
||||
def _latest_pr_by_creation(prs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
return sorted(prs, key=lambda x: x.get("creationDate", ""), reverse=True)
|
||||
|
||||
|
||||
def _normalize_repo_path(path: str) -> str:
|
||||
return str(path or "").replace("\\", "/").lstrip("./")
|
||||
|
||||
|
||||
def _is_doc_like(path: str) -> bool:
|
||||
lp = _normalize_repo_path(path).lower()
|
||||
if lp.endswith((".md", ".html", ".htm", ".pdf", ".csv", ".txt")):
|
||||
return True
|
||||
return "/docs/" in f"/{lp}" or "/object inventory/" in f"/{lp}"
|
||||
|
||||
|
||||
def _is_report_like(path: str) -> bool:
|
||||
lp = _normalize_repo_path(path).lower()
|
||||
return "/reports/" in f"/{lp}" or "/assignment report/" in f"/{lp}"
|
||||
|
||||
|
||||
def _is_workload_config_path(path: str, workload_dir: str, backup_folder: str, reports_subdir: str) -> bool:
|
||||
lp = _normalize_repo_path(path).lower()
|
||||
backup_norm = _normalize_repo_path(backup_folder).lower().strip("/")
|
||||
workload_norm = _normalize_repo_path(workload_dir).lower().strip("/")
|
||||
reports_norm = _normalize_repo_path(reports_subdir).lower().strip("/")
|
||||
|
||||
if not backup_norm or not workload_norm:
|
||||
return False
|
||||
|
||||
workload_prefix = f"{backup_norm}/{workload_norm}/"
|
||||
if not lp.startswith(workload_prefix):
|
||||
return False
|
||||
|
||||
if reports_norm and lp.startswith(f"{backup_norm}/{reports_norm}/"):
|
||||
return False
|
||||
|
||||
if _is_doc_like(lp) or _is_report_like(lp):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _config_fingerprint_from_local_tree(
|
||||
repo_root: str, commitish: str, workload_dir: str, backup_folder: str, reports_subdir: str
|
||||
) -> str:
|
||||
backup_norm = _normalize_repo_path(backup_folder).strip("/")
|
||||
workload_norm = _normalize_repo_path(workload_dir).strip("/")
|
||||
path_prefix = f"{backup_norm}/{workload_norm}" if backup_norm and workload_norm else ""
|
||||
if not path_prefix:
|
||||
return ""
|
||||
|
||||
try:
|
||||
out = _run_git(repo_root, ["ls-tree", "-r", "--full-tree", commitish, "--", path_prefix])
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
pairs: list[str] = []
|
||||
for line in out.splitlines():
|
||||
if "\t" not in line:
|
||||
continue
|
||||
left, rel_path = line.split("\t", 1)
|
||||
parts = left.split()
|
||||
if len(parts) < 3 or parts[1] != "blob":
|
||||
continue
|
||||
blob_id = parts[2].strip()
|
||||
if not blob_id:
|
||||
continue
|
||||
if not _is_workload_config_path(rel_path, workload_dir, backup_folder, reports_subdir):
|
||||
continue
|
||||
pairs.append(f"{_normalize_repo_path(rel_path)}\t{blob_id}")
|
||||
|
||||
if not pairs:
|
||||
return ""
|
||||
pairs.sort(key=lambda item: item.lower())
|
||||
joined = "\n".join(pairs).encode("utf-8")
|
||||
return hashlib.sha256(joined).hexdigest()
|
||||
|
||||
|
||||
def _config_fingerprint_from_tree_api(
|
||||
repo_api: str, headers: dict[str, str], tree_id: str, workload_dir: str, backup_folder: str, reports_subdir: str
|
||||
) -> str:
|
||||
if not tree_id:
|
||||
return ""
|
||||
url = f"{repo_api}/trees/{tree_id}?recursive=true&api-version=7.1"
|
||||
payload = _request_json(url, headers=headers)
|
||||
entries = payload.get("treeEntries", []) if isinstance(payload, dict) else []
|
||||
|
||||
pairs: list[str] = []
|
||||
for entry in entries:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if str(entry.get("gitObjectType", "")).lower() != "blob":
|
||||
continue
|
||||
rel_path = str(entry.get("relativePath", ""))
|
||||
if not _is_workload_config_path(rel_path, workload_dir, backup_folder, reports_subdir):
|
||||
continue
|
||||
blob_id = str(entry.get("objectId", "")).strip()
|
||||
if not blob_id:
|
||||
continue
|
||||
pairs.append(f"{_normalize_repo_path(rel_path)}\t{blob_id}")
|
||||
|
||||
if not pairs:
|
||||
return ""
|
||||
pairs.sort(key=lambda item: item.lower())
|
||||
joined = "\n".join(pairs).encode("utf-8")
|
||||
return hashlib.sha256(joined).hexdigest()
|
||||
|
||||
|
||||
def _workload_config_diff_exists(
|
||||
repo_root: str,
|
||||
baseline_commitish: str,
|
||||
drift_commitish: str,
|
||||
workload_dir: str,
|
||||
backup_folder: str,
|
||||
reports_subdir: str,
|
||||
) -> bool:
|
||||
baseline_fingerprint = _config_fingerprint_from_local_tree(
|
||||
repo_root=repo_root,
|
||||
commitish=baseline_commitish,
|
||||
workload_dir=workload_dir,
|
||||
backup_folder=backup_folder,
|
||||
reports_subdir=reports_subdir,
|
||||
)
|
||||
drift_fingerprint = _config_fingerprint_from_local_tree(
|
||||
repo_root=repo_root,
|
||||
commitish=drift_commitish,
|
||||
workload_dir=workload_dir,
|
||||
backup_folder=backup_folder,
|
||||
reports_subdir=reports_subdir,
|
||||
)
|
||||
|
||||
if baseline_fingerprint and drift_fingerprint:
|
||||
return baseline_fingerprint != drift_fingerprint
|
||||
|
||||
try:
|
||||
return _tree_id_for_commitish(repo_root, baseline_commitish) != _tree_id_for_commitish(repo_root, drift_commitish)
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
def _find_matching_abandoned_pr(
|
||||
repo_api: str,
|
||||
headers: dict[str, str],
|
||||
abandoned_prs: list[dict[str, Any]],
|
||||
drift_tree: str,
|
||||
repo_root: str,
|
||||
workload_dir: str,
|
||||
backup_folder: str,
|
||||
reports_subdir: str,
|
||||
drift_commitish: str,
|
||||
) -> tuple[dict[str, Any] | None, str]:
|
||||
current_config_fingerprint = _config_fingerprint_from_local_tree(
|
||||
repo_root=repo_root,
|
||||
commitish=drift_commitish,
|
||||
workload_dir=workload_dir,
|
||||
backup_folder=backup_folder,
|
||||
reports_subdir=reports_subdir,
|
||||
)
|
||||
tree_fingerprint_cache: dict[str, str] = {}
|
||||
|
||||
for pr in _latest_pr_by_creation(abandoned_prs):
|
||||
commit_id = (
|
||||
((pr.get("lastMergeSourceCommit") or {}).get("commitId"))
|
||||
or ((pr.get("lastMergeCommit") or {}).get("commitId"))
|
||||
or ""
|
||||
).strip()
|
||||
if not commit_id:
|
||||
continue
|
||||
try:
|
||||
pr_tree = _commit_tree_id(repo_api, headers, commit_id)
|
||||
except Exception:
|
||||
continue
|
||||
if pr_tree and pr_tree == drift_tree:
|
||||
return pr, "exact-tree"
|
||||
|
||||
if current_config_fingerprint and pr_tree:
|
||||
if pr_tree not in tree_fingerprint_cache:
|
||||
try:
|
||||
tree_fingerprint_cache[pr_tree] = _config_fingerprint_from_tree_api(
|
||||
repo_api=repo_api,
|
||||
headers=headers,
|
||||
tree_id=pr_tree,
|
||||
workload_dir=workload_dir,
|
||||
backup_folder=backup_folder,
|
||||
reports_subdir=reports_subdir,
|
||||
)
|
||||
except Exception:
|
||||
tree_fingerprint_cache[pr_tree] = ""
|
||||
if tree_fingerprint_cache[pr_tree] and tree_fingerprint_cache[pr_tree] == current_config_fingerprint:
|
||||
return pr, "config-fingerprint"
|
||||
|
||||
return None, ""
|
||||
|
||||
|
||||
def _pr_has_reject_vote(pr: dict[str, Any]) -> bool:
|
||||
reviewers = pr.get("reviewers", [])
|
||||
if not isinstance(reviewers, list):
|
||||
return False
|
||||
for reviewer in reviewers:
|
||||
if not isinstance(reviewer, dict):
|
||||
continue
|
||||
try:
|
||||
vote = int(reviewer.get("vote", 0))
|
||||
except Exception:
|
||||
vote = 0
|
||||
if vote == -10:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _current_pr_merge_strategy(pr: dict[str, Any]) -> str:
|
||||
completion_options = pr.get("completionOptions")
|
||||
if not isinstance(completion_options, dict):
|
||||
return ""
|
||||
raw = str(completion_options.get("mergeStrategy") or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
return _normalize_merge_strategy(raw)
|
||||
|
||||
|
||||
def _build_description(workload: str, drift_branch: str, baseline_branch: str, build_number: str, build_id: str) -> str:
|
||||
is_entra = workload.lower() == "entra"
|
||||
lead = "Rolling Entra drift PR created by backup pipeline." if is_entra else "Rolling drift PR created by backup pipeline."
|
||||
return (
|
||||
f"{lead}\n\n"
|
||||
f"- Source branch: `{drift_branch}`\n"
|
||||
f"- Target branch: `{baseline_branch}`\n"
|
||||
f"- Last pipeline run: `{build_number}` (BuildId: {build_id})\n\n"
|
||||
"The automated review summary is generated immediately after PR creation and inserted "
|
||||
"above the reviewer actions section.\n\n"
|
||||
"## Reviewer Quick Actions\n\n"
|
||||
"### 1) Accept all changes\n"
|
||||
"- Merge PR to accept drift into baseline.\n\n"
|
||||
"### 2) Reject whole PR and revert\n"
|
||||
"- Set reviewer vote to **Reject**.\n"
|
||||
"- Abandon PR.\n"
|
||||
"- Auto-remediation queues restore (if `AUTO_REMEDIATE_ON_PR_REJECTION=true`).\n\n"
|
||||
"### 3) Reject only selected policy changes\n"
|
||||
"- In each `Change Needed` policy thread, comment `/reject` for changes you do not want.\n"
|
||||
"- Optional: use `/accept` for changes you want to keep.\n"
|
||||
"- Wait for review-sync pipeline (about 5 minutes) to update PR diff.\n"
|
||||
"- Merge remaining accepted changes.\n"
|
||||
"- Post-merge auto-remediation queues restore to reconcile tenant to merged baseline "
|
||||
"(if `AUTO_REMEDIATE_AFTER_MERGE=true`)."
|
||||
)
|
||||
|
||||
|
||||
def _threads_with_marker(repo_api: str, headers: dict[str, str], pr_id: int, marker: str) -> bool:
|
||||
url = f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1"
|
||||
payload = _request_json(url, headers=headers)
|
||||
threads = payload.get("value", []) if isinstance(payload, dict) else []
|
||||
for thread in threads:
|
||||
for comment in thread.get("comments", []):
|
||||
content = str(comment.get("content", ""))
|
||||
if marker in content:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _queue_restore_pipeline(
|
||||
collection_uri: str,
|
||||
project: str,
|
||||
headers: dict[str, str],
|
||||
definition_id: int,
|
||||
baseline_branch: str,
|
||||
include_entra_update: bool,
|
||||
dry_run: bool,
|
||||
update_assignments: bool,
|
||||
remove_unmanaged: bool,
|
||||
max_workers: int,
|
||||
exclude_csv: str,
|
||||
) -> dict[str, Any]:
|
||||
build_api = f"{collection_uri}/{project}/_apis/build/builds?api-version=7.1"
|
||||
template_parameters = {
|
||||
"dryRun": dry_run,
|
||||
"updateAssignments": update_assignments,
|
||||
"removeObjectsNotInBaseline": remove_unmanaged,
|
||||
"includeEntraUpdate": include_entra_update,
|
||||
"baselineBranch": baseline_branch,
|
||||
"maxWorkers": max_workers,
|
||||
}
|
||||
exclude_csv = _normalize_exclude_csv(exclude_csv)
|
||||
if exclude_csv:
|
||||
template_parameters["excludeCsv"] = exclude_csv
|
||||
body = {
|
||||
"definition": {"id": definition_id},
|
||||
"sourceBranch": _ref_from_branch(baseline_branch),
|
||||
"templateParameters": template_parameters,
|
||||
}
|
||||
return _request_json(build_api, headers=headers, method="POST", body=body)
|
||||
|
||||
|
||||
def _post_pr_thread(repo_api: str, headers: dict[str, str], pr_id: int, content: str) -> None:
|
||||
url = f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1"
|
||||
body = {
|
||||
"comments": [{"parentCommentId": 0, "content": content, "commentType": 1}],
|
||||
"status": "active",
|
||||
}
|
||||
_request_json(url, headers=headers, method="POST", body=body)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Ensure rolling PR exists with optional remediation-on-rejection")
|
||||
parser.add_argument("--repo-root", required=True)
|
||||
parser.add_argument("--workload", required=True, choices=["intune", "entra"])
|
||||
parser.add_argument("--drift-branch", required=True)
|
||||
parser.add_argument("--baseline-branch", required=True)
|
||||
parser.add_argument("--pr-title", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip()
|
||||
if not token:
|
||||
raise SystemExit("SYSTEM_ACCESSTOKEN is empty. Enable OAuth token access for this pipeline.")
|
||||
|
||||
collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/")
|
||||
project = os.environ["SYSTEM_TEAMPROJECT"]
|
||||
repository_id = os.environ["BUILD_REPOSITORY_ID"]
|
||||
build_number = os.environ.get("BUILD_BUILDNUMBER", "")
|
||||
build_id = os.environ.get("BUILD_BUILDID", "")
|
||||
|
||||
auto_remediate = _env_bool("AUTO_REMEDIATE_ON_PR_REJECTION", False)
|
||||
include_entra_update = _env_bool("AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE", False)
|
||||
remediation_def_id_raw = _env_text("AUTO_REMEDIATE_RESTORE_PIPELINE_ID", "")
|
||||
remediation_dry_run = _env_bool("AUTO_REMEDIATE_DRY_RUN", False)
|
||||
remediation_update_assignments = _env_bool("AUTO_REMEDIATE_UPDATE_ASSIGNMENTS", True)
|
||||
remediation_remove_unmanaged = _env_bool("AUTO_REMEDIATE_REMOVE_OBJECTS", False)
|
||||
remediation_max_workers_raw = _env_text("AUTO_REMEDIATE_MAX_WORKERS", "10")
|
||||
remediation_exclude_csv = _normalize_exclude_csv(_env_text("AUTO_REMEDIATE_EXCLUDE_CSV", ""))
|
||||
pr_merge_strategy = _normalize_merge_strategy(_env_text("ROLLING_PR_MERGE_STRATEGY", "rebase"))
|
||||
create_as_draft = _env_bool("ROLLING_PR_DELAY_REVIEWER_NOTIFICATIONS", False)
|
||||
|
||||
try:
|
||||
remediation_max_workers = int(remediation_max_workers_raw)
|
||||
except ValueError as exc:
|
||||
raise SystemExit(f"Invalid AUTO_REMEDIATE_MAX_WORKERS value: {remediation_max_workers_raw}") from exc
|
||||
|
||||
if auto_remediate and not remediation_def_id_raw:
|
||||
print(
|
||||
"WARNING: AUTO_REMEDIATE_ON_PR_REJECTION=true but AUTO_REMEDIATE_RESTORE_PIPELINE_ID is empty; "
|
||||
"remediation queueing disabled for this run.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
auto_remediate = False
|
||||
|
||||
try:
|
||||
remediation_def_id = int(remediation_def_id_raw) if remediation_def_id_raw else 0
|
||||
except ValueError as exc:
|
||||
raise SystemExit(
|
||||
f"Invalid AUTO_REMEDIATE_RESTORE_PIPELINE_ID value: {remediation_def_id_raw}"
|
||||
) from exc
|
||||
|
||||
drift_branch = _normalize_branch(args.drift_branch)
|
||||
baseline_branch = _normalize_branch(args.baseline_branch)
|
||||
backup_folder = _env_text("BACKUP_FOLDER", "tenant-state")
|
||||
reports_subdir = _env_text("REPORTS_SUBDIR", "reports")
|
||||
workload_dir = _env_text(
|
||||
"INTUNE_BACKUP_SUBDIR" if args.workload == "intune" else "ENTRA_BACKUP_SUBDIR",
|
||||
args.workload,
|
||||
)
|
||||
source_ref = _ref_from_branch(drift_branch)
|
||||
target_ref = _ref_from_branch(baseline_branch)
|
||||
|
||||
repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
description = _build_description(args.workload, drift_branch, baseline_branch, build_number, build_id)
|
||||
completion_options = {"mergeStrategy": pr_merge_strategy}
|
||||
print(f"Rolling PR completion merge strategy: {pr_merge_strategy}")
|
||||
|
||||
active_prs = _query_prs(repo_api, headers, source_ref, target_ref, "active")
|
||||
if active_prs:
|
||||
pr = active_prs[0]
|
||||
pr_id = pr.get("pullRequestId")
|
||||
current_title = str(pr.get("title") or "")
|
||||
current_description = str(pr.get("description") or "")
|
||||
current_merge_strategy = _current_pr_merge_strategy(pr)
|
||||
desired_description = current_description if current_description.strip() else description
|
||||
needs_patch = (
|
||||
current_title != args.pr_title
|
||||
or not current_description.strip()
|
||||
or current_merge_strategy != pr_merge_strategy
|
||||
)
|
||||
if needs_patch:
|
||||
update_url = f"{repo_api}/pullrequests/{pr_id}?api-version=7.1"
|
||||
_request_json(
|
||||
update_url,
|
||||
headers=headers,
|
||||
method="PATCH",
|
||||
body={
|
||||
"title": args.pr_title,
|
||||
"description": desired_description,
|
||||
"completionOptions": completion_options,
|
||||
},
|
||||
)
|
||||
web_url = _pr_web_url(pr)
|
||||
if needs_patch:
|
||||
print(f"Updated rolling {args.workload} PR #{pr_id}: {web_url}")
|
||||
else:
|
||||
print(f"Rolling {args.workload} PR #{pr_id} already up to date: {web_url}")
|
||||
print(f"##vso[task.setvariable variable=DRIFT_PR_ID;isOutput=true]{pr_id}")
|
||||
if web_url:
|
||||
print(f"##vso[task.setvariable variable=DRIFT_PR_URL;isOutput=true]{web_url}")
|
||||
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]0")
|
||||
return 0
|
||||
|
||||
_run_git(args.repo_root, ["fetch", "--quiet", "origin", baseline_branch, drift_branch])
|
||||
baseline_commitish = f"origin/{baseline_branch}" if _ref_has_commit(args.repo_root, f"origin/{baseline_branch}") else baseline_branch
|
||||
drift_commitish = f"origin/{drift_branch}" if _ref_has_commit(args.repo_root, f"origin/{drift_branch}") else "HEAD"
|
||||
if not _workload_config_diff_exists(
|
||||
repo_root=args.repo_root,
|
||||
baseline_commitish=baseline_commitish,
|
||||
drift_commitish=drift_commitish,
|
||||
workload_dir=workload_dir,
|
||||
backup_folder=backup_folder,
|
||||
reports_subdir=reports_subdir,
|
||||
):
|
||||
print(
|
||||
"Suppressed PR recreation: drift branch has no effective workload configuration diff "
|
||||
f"against {baseline_branch}."
|
||||
)
|
||||
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
|
||||
return 0
|
||||
|
||||
drift_tree = _tree_id_for_commitish(args.repo_root, drift_commitish)
|
||||
abandoned_prs = _query_prs(repo_api, headers, source_ref, target_ref, "abandoned")
|
||||
matching_abandoned, match_reason = _find_matching_abandoned_pr(
|
||||
repo_api=repo_api,
|
||||
headers=headers,
|
||||
abandoned_prs=abandoned_prs,
|
||||
drift_tree=drift_tree,
|
||||
repo_root=args.repo_root,
|
||||
workload_dir=workload_dir,
|
||||
backup_folder=backup_folder,
|
||||
reports_subdir=reports_subdir,
|
||||
drift_commitish=drift_commitish,
|
||||
)
|
||||
|
||||
if matching_abandoned:
|
||||
if match_reason == "config-fingerprint":
|
||||
print(
|
||||
"Matched abandoned PR using configuration fingerprint "
|
||||
"(ignoring docs/reports churn)."
|
||||
)
|
||||
pr_id = int(matching_abandoned["pullRequestId"])
|
||||
if not _pr_has_reject_vote(matching_abandoned):
|
||||
print(
|
||||
"Matched abandoned PR without reviewer Reject vote; "
|
||||
"skipping remediation and suppressing PR recreation for this unchanged drift snapshot."
|
||||
)
|
||||
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
|
||||
return 0
|
||||
|
||||
if not auto_remediate:
|
||||
print(
|
||||
"Suppressed PR recreation: latest drift matches a rejected PR, "
|
||||
"but AUTO_REMEDIATE_ON_PR_REJECTION is disabled."
|
||||
)
|
||||
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
|
||||
return 0
|
||||
|
||||
marker = f"Automation marker: AUTO-REMEDIATE-TREE:{drift_tree}"
|
||||
already_queued = _threads_with_marker(repo_api, headers, pr_id, marker)
|
||||
|
||||
if already_queued:
|
||||
print(
|
||||
"Suppressed PR recreation: latest drift matches a previously rejected PR and remediation was already queued."
|
||||
)
|
||||
else:
|
||||
queued = _queue_restore_pipeline(
|
||||
collection_uri=collection_uri,
|
||||
project=project,
|
||||
headers=headers,
|
||||
definition_id=remediation_def_id,
|
||||
baseline_branch=baseline_branch,
|
||||
include_entra_update=include_entra_update,
|
||||
dry_run=remediation_dry_run,
|
||||
update_assignments=remediation_update_assignments,
|
||||
remove_unmanaged=remediation_remove_unmanaged,
|
||||
max_workers=remediation_max_workers,
|
||||
exclude_csv=remediation_exclude_csv,
|
||||
)
|
||||
build_queued_id = queued.get("id")
|
||||
build_url = ((queued.get("_links") or {}).get("web") or {}).get("href", "")
|
||||
if not build_url and build_queued_id:
|
||||
build_url = f"{collection_uri}/{project}/_build/results?buildId={build_queued_id}"
|
||||
|
||||
comment = (
|
||||
"Auto-remediation queued because the latest drift matches a rejected PR.\n\n"
|
||||
f"Workload: {args.workload}\n"
|
||||
f"Rejected PR: #{pr_id}\n"
|
||||
f"Drift tree: {drift_tree}\n"
|
||||
f"Restore pipeline definition: {remediation_def_id}\n"
|
||||
f"Restore run: {build_url or '(queued)'}\n\n"
|
||||
f"{marker}"
|
||||
)
|
||||
try:
|
||||
_post_pr_thread(repo_api, headers, pr_id, comment)
|
||||
except Exception as exc:
|
||||
print(f"WARNING: Remediation queued, but failed to post PR thread on #{pr_id}: {exc}")
|
||||
|
||||
print(
|
||||
f"Queued remediation pipeline run (definition={remediation_def_id}, buildId={build_queued_id}) and suppressed PR recreation."
|
||||
)
|
||||
|
||||
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]1")
|
||||
return 0
|
||||
|
||||
if abandoned_prs:
|
||||
print(
|
||||
f"No abandoned PR snapshot match for current drift tree (checked {len(abandoned_prs)} abandoned PR(s)); creating/updating rolling PR."
|
||||
)
|
||||
|
||||
create_url = f"{repo_api}/pullrequests?api-version=7.1"
|
||||
created = _request_json(
|
||||
create_url,
|
||||
headers=headers,
|
||||
method="POST",
|
||||
body={
|
||||
"sourceRefName": source_ref,
|
||||
"targetRefName": target_ref,
|
||||
"title": args.pr_title,
|
||||
"description": description,
|
||||
"isDraft": create_as_draft,
|
||||
"completionOptions": completion_options,
|
||||
},
|
||||
)
|
||||
pr_id = created.get("pullRequestId")
|
||||
web_url = _pr_web_url(created)
|
||||
print(f"Created rolling {args.workload} PR #{pr_id}: {web_url}")
|
||||
print(f"##vso[task.setvariable variable=DRIFT_PR_ID;isOutput=true]{pr_id}")
|
||||
if web_url:
|
||||
print(f"##vso[task.setvariable variable=DRIFT_PR_URL;isOutput=true]{web_url}")
|
||||
print("##vso[task.setvariable variable=DRIFT_PR_SUPPRESSED;isOutput=true]0")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except Exception as exc:
|
||||
print(f"ERROR: Failed to ensure rolling PR: {exc}", file=sys.stderr)
|
||||
raise
|
||||
1313
scripts/export_entra_baseline.py
Normal file
1313
scripts/export_entra_baseline.py
Normal file
File diff suppressed because it is too large
Load Diff
171
scripts/filter_entra_enrichment_noise.py
Normal file
171
scripts/filter_entra_enrichment_noise.py
Normal file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Revert Entra JSON file edits when only enrichment metadata changed."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path, PurePosixPath
|
||||
from typing import Any
|
||||
|
||||
|
||||
ENRICHMENT_KEY_NAMES = {
|
||||
"ownersresolved",
|
||||
"approleassignmentsresolved",
|
||||
"requiredresourceaccessresolved",
|
||||
"appownerorganizationresolved",
|
||||
"resolutionstatus",
|
||||
}
|
||||
|
||||
|
||||
def _to_bool(value: str) -> bool:
|
||||
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def _run_git(repo_root: Path, args: list[str], check: bool = True) -> subprocess.CompletedProcess[bytes]:
|
||||
proc = subprocess.run(
|
||||
["git", *args],
|
||||
cwd=str(repo_root),
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
if check and proc.returncode != 0:
|
||||
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
|
||||
raise RuntimeError(f"git {' '.join(args)} failed ({proc.returncode}): {stderr}")
|
||||
return proc
|
||||
|
||||
|
||||
def _strip_enrichment(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
cleaned: dict[str, Any] = {}
|
||||
for key, child in value.items():
|
||||
if str(key).strip().lower() in ENRICHMENT_KEY_NAMES:
|
||||
continue
|
||||
cleaned[key] = _strip_enrichment(child)
|
||||
return cleaned
|
||||
if isinstance(value, list):
|
||||
return [_strip_enrichment(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
def _is_enrichment_only_change(old_text: str, new_text: str) -> bool:
|
||||
if not old_text or not new_text:
|
||||
return False
|
||||
try:
|
||||
old_payload = json.loads(old_text)
|
||||
new_payload = json.loads(new_text)
|
||||
except Exception:
|
||||
return False
|
||||
if not isinstance(old_payload, dict) or not isinstance(new_payload, dict):
|
||||
return False
|
||||
|
||||
old_stripped = _strip_enrichment(old_payload)
|
||||
new_stripped = _strip_enrichment(new_payload)
|
||||
if old_stripped != new_stripped:
|
||||
return False
|
||||
return old_payload != new_payload
|
||||
|
||||
|
||||
def _modified_paths(repo_root: Path, workload_root: str) -> list[str]:
|
||||
proc = _run_git(
|
||||
repo_root,
|
||||
["diff", "--name-only", "-z", "--diff-filter=M", "--", workload_root],
|
||||
check=True,
|
||||
)
|
||||
raw = proc.stdout.split(b"\x00")
|
||||
paths: list[str] = []
|
||||
for chunk in raw:
|
||||
text = chunk.decode("utf-8", errors="replace").strip()
|
||||
if text:
|
||||
paths.append(text)
|
||||
return paths
|
||||
|
||||
|
||||
def _is_json_path(path: str) -> bool:
|
||||
return PurePosixPath(path.replace("\\", "/")).suffix.lower() == ".json"
|
||||
|
||||
|
||||
def filter_enrichment_only_files(repo_root: Path, workload_root: str) -> list[str]:
|
||||
reverted: list[str] = []
|
||||
for rel_path in _modified_paths(repo_root, workload_root):
|
||||
if not _is_json_path(rel_path):
|
||||
continue
|
||||
|
||||
head_proc = _run_git(repo_root, ["show", f"HEAD:{rel_path}"], check=False)
|
||||
if head_proc.returncode != 0:
|
||||
continue
|
||||
old_text = head_proc.stdout.decode("utf-8", errors="replace")
|
||||
|
||||
abs_path = repo_root / rel_path
|
||||
if not abs_path.is_file():
|
||||
continue
|
||||
new_text = abs_path.read_text(encoding="utf-8")
|
||||
|
||||
if _is_enrichment_only_change(old_text, new_text):
|
||||
_run_git(repo_root, ["checkout", "--quiet", "--", rel_path], check=True)
|
||||
reverted.append(rel_path)
|
||||
return reverted
|
||||
|
||||
|
||||
def find_enrichment_only_modified_files(repo_root: Path, workload_root: str) -> list[str]:
|
||||
matches: list[str] = []
|
||||
for rel_path in _modified_paths(repo_root, workload_root):
|
||||
if not _is_json_path(rel_path):
|
||||
continue
|
||||
|
||||
head_proc = _run_git(repo_root, ["show", f"HEAD:{rel_path}"], check=False)
|
||||
if head_proc.returncode != 0:
|
||||
continue
|
||||
old_text = head_proc.stdout.decode("utf-8", errors="replace")
|
||||
|
||||
abs_path = repo_root / rel_path
|
||||
if not abs_path.is_file():
|
||||
continue
|
||||
new_text = abs_path.read_text(encoding="utf-8")
|
||||
|
||||
if _is_enrichment_only_change(old_text, new_text):
|
||||
matches.append(rel_path)
|
||||
return matches
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--repo-root", required=True, help="Repository root path.")
|
||||
parser.add_argument(
|
||||
"--workload-root",
|
||||
default="tenant-state/entra",
|
||||
help="Path scope inside repo to inspect (default: tenant-state/entra).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fail-on-residual-enrichment-drift",
|
||||
default="true",
|
||||
help="Exit non-zero when enrichment-only modified files remain after filtering (true/false).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
repo_root = Path(args.repo_root).resolve()
|
||||
reverted = filter_enrichment_only_files(repo_root=repo_root, workload_root=args.workload_root)
|
||||
if reverted:
|
||||
print(f"Reverted enrichment-only Entra file changes: {len(reverted)}")
|
||||
for path in reverted:
|
||||
print(f" - {path}")
|
||||
else:
|
||||
print("No enrichment-only Entra file changes detected.")
|
||||
|
||||
residual = find_enrichment_only_modified_files(repo_root=repo_root, workload_root=args.workload_root)
|
||||
if residual:
|
||||
print(f"Residual enrichment-only Entra file changes still present: {len(residual)}")
|
||||
for path in residual:
|
||||
print(f" - {path}")
|
||||
if _to_bool(args.fail_on_residual_enrichment_drift):
|
||||
return 2
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
144
scripts/filter_intune_partial_settings_noise.py
Normal file
144
scripts/filter_intune_partial_settings_noise.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Revert Intune Settings Catalog partial exports where settings payload is missing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _to_bool(value: str) -> bool:
|
||||
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def _run_git_show(repo_root: Path, ref: str, rel_path: str) -> str | None:
|
||||
proc = subprocess.run(
|
||||
["git", "show", f"{ref}:{rel_path}"],
|
||||
cwd=str(repo_root),
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
return None
|
||||
return proc.stdout.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def _is_settings_catalog_json(file_path: Path, backup_root: Path) -> bool:
|
||||
if file_path.suffix.lower() != ".json":
|
||||
return False
|
||||
rel = file_path.relative_to(backup_root).as_posix().lower()
|
||||
return rel.startswith("settings catalog/")
|
||||
|
||||
|
||||
def _is_partial_settings_payload(payload: Any) -> bool:
|
||||
if not isinstance(payload, dict):
|
||||
return False
|
||||
setting_count = payload.get("settingCount")
|
||||
if not isinstance(setting_count, int) or setting_count <= 0:
|
||||
return False
|
||||
settings = payload.get("settings")
|
||||
if not isinstance(settings, list):
|
||||
return True
|
||||
return len(settings) == 0
|
||||
|
||||
|
||||
def restore_partial_settings_from_baseline(
|
||||
repo_root: Path,
|
||||
backup_root: Path,
|
||||
baseline_ref: str,
|
||||
) -> tuple[list[str], list[str]]:
|
||||
restored: list[str] = []
|
||||
unresolved: list[str] = []
|
||||
|
||||
for file_path in sorted(backup_root.rglob("*.json")):
|
||||
if not _is_settings_catalog_json(file_path, backup_root):
|
||||
continue
|
||||
|
||||
try:
|
||||
current_payload = json.loads(file_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not _is_partial_settings_payload(current_payload):
|
||||
continue
|
||||
|
||||
rel_path = file_path.relative_to(repo_root).as_posix()
|
||||
baseline_text = _run_git_show(repo_root, baseline_ref, rel_path)
|
||||
if not baseline_text:
|
||||
unresolved.append(rel_path)
|
||||
continue
|
||||
|
||||
try:
|
||||
baseline_payload = json.loads(baseline_text)
|
||||
except Exception:
|
||||
unresolved.append(rel_path)
|
||||
continue
|
||||
|
||||
baseline_settings = baseline_payload.get("settings")
|
||||
if not isinstance(baseline_settings, list) or len(baseline_settings) == 0:
|
||||
unresolved.append(rel_path)
|
||||
continue
|
||||
|
||||
current_payload["settings"] = baseline_settings
|
||||
file_path.write_text(json.dumps(current_payload, indent=5, ensure_ascii=False), encoding="utf-8")
|
||||
restored.append(rel_path)
|
||||
|
||||
return restored, unresolved
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--repo-root", required=True, help="Repository root path.")
|
||||
parser.add_argument(
|
||||
"--backup-root",
|
||||
default="tenant-state/intune",
|
||||
help="Path to Intune backup root (default: tenant-state/intune).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--baseline-ref",
|
||||
default="HEAD",
|
||||
help="Git ref used as baseline for restoration (default: HEAD).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fail-on-unresolved-partial-exports",
|
||||
default="true",
|
||||
help="Exit non-zero when partial exports cannot be restored from baseline (true/false).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
repo_root = Path(args.repo_root).resolve()
|
||||
backup_root_arg = Path(args.backup_root)
|
||||
backup_root = backup_root_arg if backup_root_arg.is_absolute() else repo_root / backup_root_arg
|
||||
backup_root = backup_root.resolve()
|
||||
|
||||
restored, unresolved = restore_partial_settings_from_baseline(
|
||||
repo_root=repo_root,
|
||||
backup_root=backup_root,
|
||||
baseline_ref=args.baseline_ref,
|
||||
)
|
||||
|
||||
if restored:
|
||||
print(f"Restored partial Intune Settings Catalog exports from baseline: {len(restored)}")
|
||||
for path in restored:
|
||||
print(f" - {path}")
|
||||
else:
|
||||
print("No partial Intune Settings Catalog exports detected.")
|
||||
|
||||
if unresolved:
|
||||
print(f"Unresolved partial Intune Settings Catalog exports: {len(unresolved)}")
|
||||
for path in unresolved:
|
||||
print(f" - {path}")
|
||||
if _to_bool(args.fail_on_unresolved_partial_exports):
|
||||
return 2
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
259
scripts/generate_app_inventory_report.py
Normal file
259
scripts/generate_app_inventory_report.py
Normal file
@@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate a dedicated apps inventory CSV from Entra app exports."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--root", required=True, help="Path to the Entra workload backup root (tenant-state/entra).")
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
required=True,
|
||||
help="Directory where apps inventory report files will be written.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-name",
|
||||
default="apps-inventory.csv",
|
||||
help="Output CSV filename (default: apps-inventory.csv).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def safe_text(value: object) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return str(value).strip()
|
||||
|
||||
|
||||
def summarize_owners(owners: object) -> tuple[int, str]:
|
||||
if not isinstance(owners, list):
|
||||
return 0, ""
|
||||
|
||||
labels: list[str] = []
|
||||
for owner in owners:
|
||||
if not isinstance(owner, dict):
|
||||
continue
|
||||
label = (
|
||||
safe_text(owner.get("displayName"))
|
||||
or safe_text(owner.get("userPrincipalName"))
|
||||
or safe_text(owner.get("appId"))
|
||||
or safe_text(owner.get("id"))
|
||||
or "Unknown owner"
|
||||
)
|
||||
labels.append(label)
|
||||
|
||||
return len(labels), "; ".join(labels)
|
||||
|
||||
|
||||
def summarize_required_resource_access(entries: object) -> tuple[int, str]:
|
||||
if not isinstance(entries, list):
|
||||
return 0, ""
|
||||
|
||||
summary: list[str] = []
|
||||
total_permissions = 0
|
||||
for entry in entries:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
resource_name = safe_text(entry.get("resourceDisplayName")) or "Unresolved resource"
|
||||
resource_app_id = safe_text(entry.get("resourceAppId"))
|
||||
permissions = entry.get("permissions")
|
||||
permission_labels: list[str] = []
|
||||
if isinstance(permissions, list):
|
||||
for permission in permissions:
|
||||
if not isinstance(permission, dict):
|
||||
continue
|
||||
total_permissions += 1
|
||||
perm_type = safe_text(permission.get("type")) or "UnknownType"
|
||||
perm_label = (
|
||||
safe_text(permission.get("value"))
|
||||
or safe_text(permission.get("displayName"))
|
||||
or safe_text(permission.get("id"))
|
||||
or "UnknownPermission"
|
||||
)
|
||||
permission_labels.append(f"{perm_label} [{perm_type}]")
|
||||
|
||||
resource_label = resource_name
|
||||
if resource_app_id:
|
||||
resource_label += f" ({resource_app_id})"
|
||||
if permission_labels:
|
||||
summary.append(f"{resource_label}: {', '.join(permission_labels)}")
|
||||
else:
|
||||
summary.append(resource_label)
|
||||
|
||||
return total_permissions, "; ".join(summary)
|
||||
|
||||
|
||||
def summarize_enterprise_app_role_assignments(entries: object) -> tuple[int, str]:
|
||||
if not isinstance(entries, list):
|
||||
return 0, ""
|
||||
|
||||
summary: list[str] = []
|
||||
count = 0
|
||||
for entry in entries:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
count += 1
|
||||
resource_name = safe_text(entry.get("resourceDisplayName")) or "Unresolved resource"
|
||||
resource_id = safe_text(entry.get("resourceId"))
|
||||
role_name = (
|
||||
safe_text(entry.get("appRoleValue"))
|
||||
or safe_text(entry.get("appRoleDisplayName"))
|
||||
or safe_text(entry.get("appRoleId"))
|
||||
or "Default access"
|
||||
)
|
||||
label = resource_name
|
||||
if resource_id:
|
||||
label += f" ({resource_id})"
|
||||
summary.append(f"{label}: {role_name}")
|
||||
|
||||
return count, "; ".join(summary)
|
||||
|
||||
|
||||
def verified_publisher_label(value: object) -> str:
|
||||
if not isinstance(value, dict):
|
||||
return ""
|
||||
return (
|
||||
safe_text(value.get("displayName"))
|
||||
or safe_text(value.get("verifiedPublisherId"))
|
||||
or safe_text(value.get("addedDateTime"))
|
||||
)
|
||||
|
||||
|
||||
def iter_exported_json(export_dir: Path) -> list[tuple[Path, dict[str, Any]]]:
|
||||
if not export_dir.exists():
|
||||
return []
|
||||
items: list[tuple[Path, dict[str, Any]]] = []
|
||||
for path in sorted(export_dir.rglob("*.json")):
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
continue
|
||||
if isinstance(payload, dict):
|
||||
items.append((path, payload))
|
||||
return items
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
root = Path(args.root).resolve()
|
||||
output_dir = Path(args.output_dir).resolve()
|
||||
output_path = output_dir / args.output_name
|
||||
|
||||
if not root.exists():
|
||||
raise SystemExit(f"Backup path does not exist: {root}")
|
||||
|
||||
app_reg_dir = root / "App Registrations"
|
||||
ent_apps_dir = root / "Enterprise Applications"
|
||||
|
||||
app_reg_items = iter_exported_json(app_reg_dir)
|
||||
ent_app_items = iter_exported_json(ent_apps_dir)
|
||||
|
||||
rows: list[dict[str, str]] = []
|
||||
|
||||
for source_path, payload in app_reg_items:
|
||||
owner_count, owners = summarize_owners(payload.get("ownersResolved"))
|
||||
perm_count, permissions = summarize_required_resource_access(
|
||||
payload.get("requiredResourceAccessResolved")
|
||||
)
|
||||
rows.append(
|
||||
{
|
||||
"AppType": "AppRegistration",
|
||||
"DisplayName": safe_text(payload.get("displayName")) or source_path.stem,
|
||||
"ObjectId": safe_text(payload.get("id")),
|
||||
"AppId": safe_text(payload.get("appId")),
|
||||
"SignInAudience": safe_text(payload.get("signInAudience")),
|
||||
"ServicePrincipalType": "",
|
||||
"AccountEnabled": "",
|
||||
"PublisherDomain": safe_text(payload.get("publisherDomain")),
|
||||
"PublisherName": "",
|
||||
"VerifiedPublisher": verified_publisher_label(payload.get("verifiedPublisher")),
|
||||
"CreatedDateTime": safe_text(payload.get("createdDateTime")),
|
||||
"OwnersCount": str(owner_count),
|
||||
"OwnersResolved": owners,
|
||||
"ResolvedPermissionCount": str(perm_count),
|
||||
"ResolvedPermissions": permissions,
|
||||
"ResolvedAppRoleAssignmentCount": "0",
|
||||
"ResolvedAppRoleAssignments": "",
|
||||
"SourceFile": source_path.relative_to(root).as_posix(),
|
||||
}
|
||||
)
|
||||
|
||||
for source_path, payload in ent_app_items:
|
||||
owner_count, owners = summarize_owners(payload.get("ownersResolved"))
|
||||
assignment_count, assignments = summarize_enterprise_app_role_assignments(
|
||||
payload.get("appRoleAssignmentsResolved")
|
||||
)
|
||||
rows.append(
|
||||
{
|
||||
"AppType": "EnterpriseApplication",
|
||||
"DisplayName": safe_text(payload.get("displayName")) or source_path.stem,
|
||||
"ObjectId": safe_text(payload.get("id")),
|
||||
"AppId": safe_text(payload.get("appId")),
|
||||
"SignInAudience": "",
|
||||
"ServicePrincipalType": safe_text(payload.get("servicePrincipalType")),
|
||||
"AccountEnabled": safe_text(payload.get("accountEnabled")),
|
||||
"PublisherDomain": "",
|
||||
"PublisherName": safe_text(payload.get("publisherName")),
|
||||
"VerifiedPublisher": verified_publisher_label(payload.get("verifiedPublisher")),
|
||||
"CreatedDateTime": "",
|
||||
"OwnersCount": str(owner_count),
|
||||
"OwnersResolved": owners,
|
||||
"ResolvedPermissionCount": "0",
|
||||
"ResolvedPermissions": "",
|
||||
"ResolvedAppRoleAssignmentCount": str(assignment_count),
|
||||
"ResolvedAppRoleAssignments": assignments,
|
||||
"SourceFile": source_path.relative_to(root).as_posix(),
|
||||
}
|
||||
)
|
||||
|
||||
rows.sort(
|
||||
key=lambda row: (
|
||||
row["AppType"].lower(),
|
||||
row["DisplayName"].lower(),
|
||||
row["ObjectId"].lower(),
|
||||
)
|
||||
)
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
fieldnames = [
|
||||
"AppType",
|
||||
"DisplayName",
|
||||
"ObjectId",
|
||||
"AppId",
|
||||
"SignInAudience",
|
||||
"ServicePrincipalType",
|
||||
"AccountEnabled",
|
||||
"PublisherDomain",
|
||||
"PublisherName",
|
||||
"VerifiedPublisher",
|
||||
"CreatedDateTime",
|
||||
"OwnersCount",
|
||||
"OwnersResolved",
|
||||
"ResolvedPermissionCount",
|
||||
"ResolvedPermissions",
|
||||
"ResolvedAppRoleAssignmentCount",
|
||||
"ResolvedAppRoleAssignments",
|
||||
"SourceFile",
|
||||
]
|
||||
with output_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
print(
|
||||
"Generated apps inventory report: "
|
||||
+ f"{output_path} "
|
||||
+ f"(rows={len(rows)}, appRegistrations={len(app_reg_items)}, enterpriseApps={len(ent_app_items)})"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
419
scripts/generate_assignment_report.py
Normal file
419
scripts/generate_assignment_report.py
Normal file
@@ -0,0 +1,419 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate a policy assignment inventory report from Intune backup JSON files."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
GROUP_TARGET_TYPES = {
|
||||
"#microsoft.graph.groupAssignmentTarget",
|
||||
"#microsoft.graph.exclusionGroupAssignmentTarget",
|
||||
}
|
||||
|
||||
DEFAULT_POLICY_TYPES = {
|
||||
"app configuration",
|
||||
"app protection",
|
||||
"applications",
|
||||
"compliance policies",
|
||||
"conditional access",
|
||||
"device configurations",
|
||||
"enrollment configurations",
|
||||
"enrollment profiles",
|
||||
"filters",
|
||||
"scripts",
|
||||
"settings catalog",
|
||||
}
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--root", required=True, help="Path to the workload backup root (for example tenant-state/intune).")
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
required=True,
|
||||
help="Directory where report files will be written.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--policy-type",
|
||||
action="append",
|
||||
default=[],
|
||||
help=(
|
||||
"Optional filter for policy type (top-level backup folder name). "
|
||||
"Repeat the flag or pass a comma-separated list."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--graph-type",
|
||||
action="append",
|
||||
default=[],
|
||||
help=(
|
||||
"Optional filter for Graph @odata.type values. "
|
||||
"Repeat the flag or pass a comma-separated list."
|
||||
),
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssignmentRow:
|
||||
category: str
|
||||
policy_type: str
|
||||
object_name: str
|
||||
object_type: str
|
||||
assignment_state: str
|
||||
assignment_count: int
|
||||
intent: str
|
||||
assignment_target: str
|
||||
target_type: str
|
||||
assignment_filter: str
|
||||
filter_type: str
|
||||
source_file: str
|
||||
|
||||
|
||||
def safe_text(value: object) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return str(value).strip()
|
||||
|
||||
|
||||
def normalize_intent(intent: str) -> str:
|
||||
normalized = safe_text(intent).lower()
|
||||
if normalized in {"apply", "include"}:
|
||||
return "Include"
|
||||
if normalized in {"exclude"}:
|
||||
return "Exclude"
|
||||
if not normalized:
|
||||
return "Include"
|
||||
return normalized.capitalize()
|
||||
|
||||
|
||||
def infer_intent(assignment: dict, target_type: str) -> str:
|
||||
target_type_lower = safe_text(target_type).lower()
|
||||
if "exclusion" in target_type_lower:
|
||||
return "Exclude"
|
||||
explicit = safe_text(assignment.get("intent"))
|
||||
if explicit:
|
||||
return normalize_intent(explicit)
|
||||
return "Include"
|
||||
|
||||
|
||||
def resolve_assignment_target(target: dict) -> str:
|
||||
target_type = safe_text(target.get("@odata.type"))
|
||||
if target_type == "#microsoft.graph.allDevicesAssignmentTarget":
|
||||
return "All devices"
|
||||
if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget":
|
||||
return "All users"
|
||||
if target_type in GROUP_TARGET_TYPES:
|
||||
return (
|
||||
safe_text(target.get("groupDisplayName"))
|
||||
or safe_text(target.get("groupName"))
|
||||
or safe_text(target.get("groupId"))
|
||||
or "Unresolved group"
|
||||
)
|
||||
return (
|
||||
safe_text(target.get("groupDisplayName"))
|
||||
or safe_text(target.get("groupName"))
|
||||
or safe_text(target.get("displayName"))
|
||||
or safe_text(target.get("id"))
|
||||
or "Unknown target"
|
||||
)
|
||||
|
||||
|
||||
def escape_md_cell(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace("|", "\\|").replace("\n", " ").strip()
|
||||
|
||||
|
||||
def parse_filter_values(raw_values: list[str]) -> set[str]:
|
||||
values = set()
|
||||
for raw in raw_values:
|
||||
for item in safe_text(raw).split(","):
|
||||
normalized = safe_text(item)
|
||||
if normalized:
|
||||
values.add(normalized.lower())
|
||||
return values
|
||||
|
||||
|
||||
def iter_assignment_rows(
|
||||
root: Path,
|
||||
policy_type_filter: set[str],
|
||||
graph_type_filter: set[str],
|
||||
) -> Iterable[AssignmentRow]:
|
||||
excluded_categories = {
|
||||
"App Registrations",
|
||||
"Enterprise Applications",
|
||||
}
|
||||
for path in sorted(root.rglob("*.json")):
|
||||
try:
|
||||
rel_path = path.relative_to(root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if rel_path.parts and rel_path.parts[0] in {"reports"}:
|
||||
continue
|
||||
if "__archive__" in rel_path.parts:
|
||||
continue
|
||||
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
|
||||
object_name = safe_text(payload.get("displayName")) or safe_text(payload.get("name"))
|
||||
if not object_name:
|
||||
object_name = path.stem.split("__")[0]
|
||||
object_type = safe_text(payload.get("@odata.type"))
|
||||
category = "/".join(rel_path.parent.parts)
|
||||
policy_type = rel_path.parts[0] if rel_path.parts else ""
|
||||
|
||||
if any(
|
||||
category == excluded or category.startswith(f"{excluded}/")
|
||||
for excluded in excluded_categories
|
||||
):
|
||||
continue
|
||||
|
||||
if policy_type_filter and policy_type.lower() not in policy_type_filter:
|
||||
continue
|
||||
if graph_type_filter and object_type.lower() not in graph_type_filter:
|
||||
continue
|
||||
|
||||
assignments = payload.get("assignments")
|
||||
if not isinstance(assignments, list):
|
||||
yield AssignmentRow(
|
||||
category=category,
|
||||
policy_type=policy_type,
|
||||
object_name=object_name,
|
||||
object_type=object_type,
|
||||
assignment_state="NotExported",
|
||||
assignment_count=0,
|
||||
intent="None",
|
||||
assignment_target="Not exported in backup",
|
||||
target_type="",
|
||||
assignment_filter="",
|
||||
filter_type="",
|
||||
source_file=rel_path.as_posix(),
|
||||
)
|
||||
continue
|
||||
|
||||
if not assignments:
|
||||
yield AssignmentRow(
|
||||
category=category,
|
||||
policy_type=policy_type,
|
||||
object_name=object_name,
|
||||
object_type=object_type,
|
||||
assignment_state="Unassigned",
|
||||
assignment_count=0,
|
||||
intent="None",
|
||||
assignment_target="No assignments",
|
||||
target_type="",
|
||||
assignment_filter="",
|
||||
filter_type="",
|
||||
source_file=rel_path.as_posix(),
|
||||
)
|
||||
continue
|
||||
|
||||
assignment_count = len([item for item in assignments if isinstance(item, dict)])
|
||||
if assignment_count == 0:
|
||||
yield AssignmentRow(
|
||||
category=category,
|
||||
policy_type=policy_type,
|
||||
object_name=object_name,
|
||||
object_type=object_type,
|
||||
assignment_state="Unassigned",
|
||||
assignment_count=0,
|
||||
intent="None",
|
||||
assignment_target="No assignments",
|
||||
target_type="",
|
||||
assignment_filter="",
|
||||
filter_type="",
|
||||
source_file=rel_path.as_posix(),
|
||||
)
|
||||
continue
|
||||
|
||||
for assignment in assignments:
|
||||
if not isinstance(assignment, dict):
|
||||
continue
|
||||
target = assignment.get("target") if isinstance(assignment.get("target"), dict) else {}
|
||||
target_type = safe_text(target.get("@odata.type"))
|
||||
intent = infer_intent(assignment, target_type)
|
||||
assignment_target = resolve_assignment_target(target)
|
||||
assignment_filter = safe_text(target.get("deviceAndAppManagementAssignmentFilterId"))
|
||||
filter_type = safe_text(target.get("deviceAndAppManagementAssignmentFilterType"))
|
||||
yield AssignmentRow(
|
||||
category=category,
|
||||
policy_type=policy_type,
|
||||
object_name=object_name,
|
||||
object_type=object_type,
|
||||
assignment_state="Assigned",
|
||||
assignment_count=assignment_count,
|
||||
intent=intent,
|
||||
assignment_target=assignment_target,
|
||||
target_type=target_type,
|
||||
assignment_filter=assignment_filter,
|
||||
filter_type=filter_type,
|
||||
source_file=rel_path.as_posix(),
|
||||
)
|
||||
|
||||
|
||||
def write_csv(rows: list[AssignmentRow], output_path: Path) -> None:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.writer(handle)
|
||||
writer.writerow(
|
||||
[
|
||||
"Category",
|
||||
"PolicyType",
|
||||
"ObjectName",
|
||||
"ObjectType",
|
||||
"AssignmentState",
|
||||
"AssignmentCount",
|
||||
"Intent",
|
||||
"AssignmentTarget",
|
||||
"TargetType",
|
||||
"AssignmentFilter",
|
||||
"FilterType",
|
||||
"SourceFile",
|
||||
]
|
||||
)
|
||||
for row in rows:
|
||||
writer.writerow(
|
||||
[
|
||||
row.category,
|
||||
row.policy_type,
|
||||
row.object_name,
|
||||
row.object_type,
|
||||
row.assignment_state,
|
||||
row.assignment_count,
|
||||
row.intent,
|
||||
row.assignment_target,
|
||||
row.target_type,
|
||||
row.assignment_filter,
|
||||
row.filter_type,
|
||||
row.source_file,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def write_markdown(rows: list[AssignmentRow], output_path: Path) -> None:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
generated = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
objects = {(row.category, row.object_name, row.source_file) for row in rows}
|
||||
assigned_objects = {
|
||||
(row.category, row.object_name, row.source_file)
|
||||
for row in rows
|
||||
if row.assignment_state == "Assigned"
|
||||
}
|
||||
unassigned_objects = {
|
||||
(row.category, row.object_name, row.source_file)
|
||||
for row in rows
|
||||
if row.assignment_state == "Unassigned"
|
||||
}
|
||||
not_exported_objects = {
|
||||
(row.category, row.object_name, row.source_file)
|
||||
for row in rows
|
||||
if row.assignment_state == "NotExported"
|
||||
}
|
||||
policy_type_counts = {}
|
||||
for row in rows:
|
||||
key = row.policy_type or "Unknown"
|
||||
policy_type_counts[key] = policy_type_counts.get(key, 0) + 1
|
||||
|
||||
with output_path.open("w", encoding="utf-8") as handle:
|
||||
handle.write("# Policy Assignment Inventory Report\n\n")
|
||||
handle.write(f"Generated: `{generated}`\n\n")
|
||||
handle.write(f"- Total objects in report: **{len(objects)}**\n")
|
||||
handle.write(f"- Objects with assignments: **{len(assigned_objects)}**\n")
|
||||
handle.write(f"- Objects without assignments: **{len(unassigned_objects)}**\n")
|
||||
handle.write(f"- Objects with assignment field not exported: **{len(not_exported_objects)}**\n")
|
||||
handle.write(f"- Total rows: **{len(rows)}**\n\n")
|
||||
handle.write("## Rows by policy type\n\n")
|
||||
handle.write("| Policy Type | Rows |\n")
|
||||
handle.write("|---|---|\n")
|
||||
for policy_type, count in sorted(policy_type_counts.items(), key=lambda item: item[0].lower()):
|
||||
handle.write(f"| {escape_md_cell(policy_type)} | {count} |\n")
|
||||
handle.write("\n")
|
||||
handle.write(
|
||||
"| Policy Type | Category | Object | Object Type | Assignment State | Assignment Count | Intent | Assignment Target | Target Type | Filter | Filter Type | Source |\n"
|
||||
)
|
||||
handle.write("|---|---|---|---|---|---|---|---|---|---|---|---|\n")
|
||||
for row in rows:
|
||||
handle.write(
|
||||
"| "
|
||||
+ " | ".join(
|
||||
[
|
||||
escape_md_cell(row.policy_type),
|
||||
escape_md_cell(row.category),
|
||||
escape_md_cell(row.object_name),
|
||||
escape_md_cell(row.object_type),
|
||||
escape_md_cell(row.assignment_state),
|
||||
escape_md_cell(str(row.assignment_count)),
|
||||
escape_md_cell(row.intent),
|
||||
escape_md_cell(row.assignment_target),
|
||||
escape_md_cell(row.target_type),
|
||||
escape_md_cell(row.assignment_filter),
|
||||
escape_md_cell(row.filter_type),
|
||||
escape_md_cell(row.source_file),
|
||||
]
|
||||
)
|
||||
+ " |\n"
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
root = Path(args.root).resolve()
|
||||
output_dir = Path(args.output_dir).resolve()
|
||||
policy_type_filter = parse_filter_values(args.policy_type)
|
||||
graph_type_filter = parse_filter_values(args.graph_type)
|
||||
using_default_policy_scope = False
|
||||
|
||||
if not policy_type_filter:
|
||||
policy_type_filter = set(DEFAULT_POLICY_TYPES)
|
||||
using_default_policy_scope = True
|
||||
|
||||
if not root.exists():
|
||||
raise SystemExit(f"Backup path does not exist: {root}")
|
||||
|
||||
rows = sorted(
|
||||
iter_assignment_rows(root, policy_type_filter, graph_type_filter),
|
||||
key=lambda x: (
|
||||
x.policy_type.lower(),
|
||||
x.category.lower(),
|
||||
x.object_name.lower(),
|
||||
x.assignment_state,
|
||||
x.intent.lower(),
|
||||
x.assignment_target.lower(),
|
||||
),
|
||||
)
|
||||
|
||||
markdown_path = output_dir / "policy-assignments.md"
|
||||
csv_path = output_dir / "policy-assignments.csv"
|
||||
write_markdown(rows, markdown_path)
|
||||
write_csv(rows, csv_path)
|
||||
|
||||
print(
|
||||
f"Generated assignment report with {len(rows)} rows: "
|
||||
f"{markdown_path} and {csv_path}"
|
||||
)
|
||||
if using_default_policy_scope:
|
||||
print(
|
||||
"Applied default policy scope: "
|
||||
+ ", ".join(sorted(DEFAULT_POLICY_TYPES))
|
||||
)
|
||||
elif policy_type_filter:
|
||||
print(f"Applied policy type filter: {', '.join(sorted(policy_type_filter))}")
|
||||
if graph_type_filter:
|
||||
print(f"Applied graph type filter: {', '.join(sorted(graph_type_filter))}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
231
scripts/generate_object_inventory_reports.py
Normal file
231
scripts/generate_object_inventory_reports.py
Normal file
@@ -0,0 +1,231 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate broad object inventory CSV reports from backup JSON files."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
GROUP_TARGET_TYPES = {
|
||||
"#microsoft.graph.groupAssignmentTarget",
|
||||
"#microsoft.graph.exclusionGroupAssignmentTarget",
|
||||
}
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--root", required=True, help="Path to the workload backup root (for example tenant-state/intune).")
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
required=True,
|
||||
help="Directory where report files will be written.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--per-type-dir",
|
||||
default="Object Inventory",
|
||||
help="Directory name under output-dir for per-policy-type CSVs.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def safe_text(value: object) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return str(value).strip()
|
||||
|
||||
|
||||
def slugify(value: str) -> str:
|
||||
text = safe_text(value).lower()
|
||||
text = re.sub(r"[^a-z0-9]+", "-", text).strip("-")
|
||||
return text or "unknown"
|
||||
|
||||
|
||||
def infer_intent(assignment: dict, target_type: str) -> str:
|
||||
if "exclusion" in target_type.lower():
|
||||
return "Exclude"
|
||||
explicit = safe_text(assignment.get("intent")).lower()
|
||||
if explicit in {"exclude"}:
|
||||
return "Exclude"
|
||||
return "Include"
|
||||
|
||||
|
||||
def resolve_assignment_target(target: dict) -> str:
|
||||
target_type = safe_text(target.get("@odata.type"))
|
||||
if target_type == "#microsoft.graph.allDevicesAssignmentTarget":
|
||||
return "All devices"
|
||||
if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget":
|
||||
return "All users"
|
||||
if target_type in GROUP_TARGET_TYPES:
|
||||
return (
|
||||
safe_text(target.get("groupDisplayName"))
|
||||
or safe_text(target.get("groupName"))
|
||||
or safe_text(target.get("groupId"))
|
||||
or "Unresolved group"
|
||||
)
|
||||
return (
|
||||
safe_text(target.get("groupDisplayName"))
|
||||
or safe_text(target.get("groupName"))
|
||||
or safe_text(target.get("displayName"))
|
||||
or safe_text(target.get("id"))
|
||||
or "Unknown target"
|
||||
)
|
||||
|
||||
|
||||
def summarize_assignments(payload: dict) -> dict[str, object]:
|
||||
assignments = payload.get("assignments")
|
||||
if not isinstance(assignments, list):
|
||||
return {
|
||||
"state": "NotExported",
|
||||
"total": 0,
|
||||
"include_targets": "",
|
||||
"exclude_targets": "",
|
||||
"all_users_assigned": "false",
|
||||
"all_devices_assigned": "false",
|
||||
}
|
||||
|
||||
include_targets: list[str] = []
|
||||
exclude_targets: list[str] = []
|
||||
all_users = False
|
||||
all_devices = False
|
||||
|
||||
valid = [item for item in assignments if isinstance(item, dict)]
|
||||
for assignment in valid:
|
||||
target = assignment.get("target") if isinstance(assignment.get("target"), dict) else {}
|
||||
target_type = safe_text(target.get("@odata.type"))
|
||||
target_name = resolve_assignment_target(target)
|
||||
intent = infer_intent(assignment, target_type)
|
||||
if target_type == "#microsoft.graph.allLicensedUsersAssignmentTarget":
|
||||
all_users = True
|
||||
if target_type == "#microsoft.graph.allDevicesAssignmentTarget":
|
||||
all_devices = True
|
||||
if intent == "Exclude":
|
||||
exclude_targets.append(target_name)
|
||||
else:
|
||||
include_targets.append(target_name)
|
||||
|
||||
state = "Assigned" if valid else "Unassigned"
|
||||
if assignments == []:
|
||||
state = "Unassigned"
|
||||
|
||||
return {
|
||||
"state": state,
|
||||
"total": len(valid),
|
||||
"include_targets": "; ".join(sorted(set(include_targets))),
|
||||
"exclude_targets": "; ".join(sorted(set(exclude_targets))),
|
||||
"all_users_assigned": str(all_users).lower(),
|
||||
"all_devices_assigned": str(all_devices).lower(),
|
||||
}
|
||||
|
||||
|
||||
def iter_rows(root: Path) -> list[dict[str, str]]:
|
||||
rows: list[dict[str, str]] = []
|
||||
for path in sorted(root.rglob("*.json")):
|
||||
rel = path.relative_to(root)
|
||||
if rel.parts and rel.parts[0] in {"reports"}:
|
||||
continue
|
||||
if "__archive__" in rel.parts:
|
||||
continue
|
||||
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
|
||||
summary = summarize_assignments(payload)
|
||||
policy_type = rel.parts[0] if rel.parts else ""
|
||||
category = "/".join(rel.parent.parts)
|
||||
object_name = safe_text(payload.get("displayName")) or safe_text(payload.get("name"))
|
||||
if not object_name:
|
||||
object_name = path.stem.split("__")[0]
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"PolicyType": policy_type,
|
||||
"Category": category,
|
||||
"ObjectName": object_name,
|
||||
"ObjectType": safe_text(payload.get("@odata.type")),
|
||||
"ObjectId": safe_text(payload.get("id")),
|
||||
"AppId": safe_text(payload.get("appId")),
|
||||
"Description": safe_text(payload.get("description")),
|
||||
"AssignmentState": safe_text(summary["state"]),
|
||||
"AssignmentCount": str(summary["total"]),
|
||||
"IncludeTargets": safe_text(summary["include_targets"]),
|
||||
"ExcludeTargets": safe_text(summary["exclude_targets"]),
|
||||
"AllUsersAssigned": safe_text(summary["all_users_assigned"]),
|
||||
"AllDevicesAssigned": safe_text(summary["all_devices_assigned"]),
|
||||
"SourceFile": rel.as_posix(),
|
||||
}
|
||||
)
|
||||
|
||||
rows.sort(
|
||||
key=lambda row: (
|
||||
row["PolicyType"].lower(),
|
||||
row["Category"].lower(),
|
||||
row["ObjectName"].lower(),
|
||||
row["SourceFile"].lower(),
|
||||
)
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def write_csv(path: Path, rows: list[dict[str, str]]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
headers = [
|
||||
"PolicyType",
|
||||
"Category",
|
||||
"ObjectName",
|
||||
"ObjectType",
|
||||
"ObjectId",
|
||||
"AppId",
|
||||
"Description",
|
||||
"AssignmentState",
|
||||
"AssignmentCount",
|
||||
"IncludeTargets",
|
||||
"ExcludeTargets",
|
||||
"AllUsersAssigned",
|
||||
"AllDevicesAssigned",
|
||||
"SourceFile",
|
||||
]
|
||||
with path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=headers)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
root = Path(args.root).resolve()
|
||||
output_dir = Path(args.output_dir).resolve()
|
||||
per_type_root = output_dir / args.per_type_dir
|
||||
|
||||
if not root.exists():
|
||||
raise SystemExit(f"Backup path does not exist: {root}")
|
||||
|
||||
rows = iter_rows(root)
|
||||
all_report = output_dir / "object-inventory-all.csv"
|
||||
write_csv(all_report, rows)
|
||||
|
||||
per_type_counts: dict[str, int] = {}
|
||||
for policy_type in sorted({row["PolicyType"] for row in rows}):
|
||||
type_rows = [row for row in rows if row["PolicyType"] == policy_type]
|
||||
per_type_report = per_type_root / f"{slugify(policy_type)}-inventory.csv"
|
||||
write_csv(per_type_report, type_rows)
|
||||
per_type_counts[policy_type] = len(type_rows)
|
||||
|
||||
print(
|
||||
f"Generated object inventory reports: all={all_report}, "
|
||||
f"perTypeCount={len(per_type_counts)}, rows={len(rows)}"
|
||||
)
|
||||
for policy_type, count in sorted(per_type_counts.items(), key=lambda item: item[0].lower()):
|
||||
print(f" - {policy_type}: {count} rows")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
447
scripts/queue_post_merge_restore.py
Normal file
447
scripts/queue_post_merge_restore.py
Normal file
@@ -0,0 +1,447 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Queue restore automatically after merged rolling PR that contains /reject decisions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import datetime as dt
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.parse
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# common.py lives in the same directory; ensure it can be imported when the
|
||||
# script is executed directly.
|
||||
_sys_path_inserted = False
|
||||
if __file__:
|
||||
_script_dir = str(Path(__file__).resolve().parent)
|
||||
if _script_dir not in sys.path:
|
||||
sys.path.insert(0, _script_dir)
|
||||
_sys_path_inserted = True
|
||||
|
||||
import common
|
||||
|
||||
if _sys_path_inserted:
|
||||
sys.path.pop(0)
|
||||
|
||||
_env_text = common.env_text
|
||||
_env_bool = common.env_bool
|
||||
_request_json = common.request_json
|
||||
|
||||
REJECT_CMD_RE = re.compile(r"(?im)^\s*(?:/|#)?reject\b")
|
||||
DECISION_RE = re.compile(r"(?im)^\s*(?:/|#)?(?P<decision>reject|accept)\b")
|
||||
AUTO_TICKET_THREAD_PREFIX = "AUTO-CHANGE-TICKET:"
|
||||
MERGE_MARKER_PREFIX = "AUTO-RESTORE-AFTER-MERGE:"
|
||||
|
||||
|
||||
def _normalize_branch(branch: str) -> str:
|
||||
b = branch.strip()
|
||||
if b.startswith("refs/heads/"):
|
||||
return b[len("refs/heads/") :]
|
||||
return b
|
||||
|
||||
|
||||
def _ref_from_branch(branch: str) -> str:
|
||||
return f"refs/heads/{_normalize_branch(branch)}"
|
||||
|
||||
|
||||
def _parse_iso_utc(value: str) -> dt.datetime | None:
|
||||
text = (value or "").strip()
|
||||
if not text:
|
||||
return None
|
||||
if text.endswith("Z"):
|
||||
text = text[:-1] + "+00:00"
|
||||
try:
|
||||
parsed = dt.datetime.fromisoformat(text)
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=dt.timezone.utc)
|
||||
return parsed.astimezone(dt.timezone.utc)
|
||||
|
||||
|
||||
def _query_completed_prs(
|
||||
repo_api: str,
|
||||
headers: dict[str, str],
|
||||
source_ref: str,
|
||||
target_ref: str,
|
||||
) -> list[dict[str, Any]]:
|
||||
query = urllib.parse.urlencode(
|
||||
{
|
||||
"searchCriteria.status": "completed",
|
||||
"searchCriteria.sourceRefName": source_ref,
|
||||
"searchCriteria.targetRefName": target_ref,
|
||||
"api-version": "7.1",
|
||||
},
|
||||
quote_via=urllib.parse.quote,
|
||||
safe="/",
|
||||
)
|
||||
payload = _request_json(f"{repo_api}/pullrequests?{query}", headers=headers)
|
||||
items = payload.get("value", []) if isinstance(payload, dict) else []
|
||||
return sorted(items, key=lambda x: x.get("closedDate", ""), reverse=True)
|
||||
|
||||
|
||||
def _threads(repo_api: str, headers: dict[str, str], pr_id: int) -> list[dict[str, Any]]:
|
||||
payload = _request_json(
|
||||
f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1",
|
||||
headers=headers,
|
||||
)
|
||||
return payload.get("value", []) if isinstance(payload, dict) else []
|
||||
|
||||
|
||||
def _thread_comment_contents(threads: list[dict[str, Any]]) -> list[str]:
|
||||
out: list[str] = []
|
||||
for thread in threads:
|
||||
comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else []
|
||||
for comment in comments:
|
||||
out.append(str(comment.get("content", "") or ""))
|
||||
return out
|
||||
|
||||
|
||||
def _ticket_path_from_content(content: str) -> str | None:
|
||||
marker_re = re.compile(
|
||||
r"(?:^|\n)\s*(?:Automation marker:\s*)?"
|
||||
+ re.escape(AUTO_TICKET_THREAD_PREFIX)
|
||||
+ r"(?P<id>[A-Za-z0-9_-]+)\s*(?:$|\n)"
|
||||
)
|
||||
match = marker_re.search(content or "")
|
||||
if not match:
|
||||
return None
|
||||
encoded = match.group("id")
|
||||
padding = "=" * ((4 - len(encoded) % 4) % 4)
|
||||
try:
|
||||
return base64.urlsafe_b64decode((encoded + padding).encode("ascii")).decode("utf-8")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _latest_thread_decision(comments: list[dict[str, Any]]) -> str | None:
|
||||
decision: str | None = None
|
||||
|
||||
def _comment_sort_key(comment: dict[str, Any]) -> tuple[int, int]:
|
||||
try:
|
||||
comment_id = int(comment.get("id", 0))
|
||||
except Exception:
|
||||
comment_id = 0
|
||||
try:
|
||||
parent_id = int(comment.get("parentCommentId", 0))
|
||||
except Exception:
|
||||
parent_id = 0
|
||||
return (comment_id, parent_id)
|
||||
|
||||
for comment in sorted(comments, key=_comment_sort_key):
|
||||
content = str(comment.get("content", "") or "")
|
||||
match = DECISION_RE.search(content)
|
||||
if match:
|
||||
decision = match.group("decision").lower()
|
||||
return decision
|
||||
|
||||
|
||||
def _rejected_ticket_paths(threads: list[dict[str, Any]]) -> list[str]:
|
||||
rejected: set[str] = set()
|
||||
for thread in threads:
|
||||
comments = thread.get("comments", []) if isinstance(thread.get("comments"), list) else []
|
||||
marker_path: str | None = None
|
||||
for comment in comments:
|
||||
marker_path = _ticket_path_from_content(str(comment.get("content", "") or ""))
|
||||
if marker_path:
|
||||
break
|
||||
if not marker_path:
|
||||
continue
|
||||
|
||||
decision = _latest_thread_decision(comments)
|
||||
if decision == "reject":
|
||||
rejected.add(marker_path)
|
||||
return sorted(rejected)
|
||||
|
||||
|
||||
def _has_reject_signal(comments: list[str]) -> bool:
|
||||
for content in comments:
|
||||
if REJECT_CMD_RE.search(content):
|
||||
return True
|
||||
if "Auto-action: /reject detected." in content:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_merge_marker(comments: list[str], merge_commit: str) -> bool:
|
||||
marker = f"Automation marker: {MERGE_MARKER_PREFIX}{merge_commit}"
|
||||
return any(marker in content for content in comments)
|
||||
|
||||
|
||||
def _is_permission_error(exc: Exception) -> bool:
|
||||
msg = str(exc).lower()
|
||||
return "http 403" in msg or "forbidden" in msg
|
||||
|
||||
|
||||
def _normalize_exclude_csv(value: str) -> str:
|
||||
normalized = str(value or "").strip()
|
||||
if normalized.lower() in {"", "none", "null", "n/a", "-", "_none_"}:
|
||||
return ""
|
||||
return normalized
|
||||
|
||||
|
||||
def _diagnose_queue_permission(
|
||||
collection_uri: str,
|
||||
project: str,
|
||||
headers: dict[str, str],
|
||||
definition_id: int,
|
||||
) -> None:
|
||||
definition_url = (
|
||||
f"{collection_uri}/{project}/_apis/build/definitions/{definition_id}"
|
||||
"?api-version=7.1"
|
||||
)
|
||||
try:
|
||||
payload = _request_json(definition_url, headers=headers)
|
||||
definition_name = str(payload.get("name", "") or "").strip()
|
||||
print(
|
||||
"Diagnostic: restore pipeline definition is readable "
|
||||
f"(id={definition_id}, name='{definition_name or 'n/a'}')."
|
||||
)
|
||||
print(
|
||||
"Diagnostic: queue call was forbidden, so missing permission is likely "
|
||||
"'Queue builds' on that restore pipeline (or pipeline is not authorized to use it)."
|
||||
)
|
||||
except Exception as diag_exc:
|
||||
print(
|
||||
"Diagnostic: unable to read restore pipeline definition "
|
||||
f"id={definition_id}. Details: {diag_exc}"
|
||||
)
|
||||
print(
|
||||
"Diagnostic: likely wrong definition ID, wrong project, or missing 'View builds' permission "
|
||||
"for the calling pipeline identity."
|
||||
)
|
||||
|
||||
|
||||
def _queue_restore_pipeline(
|
||||
collection_uri: str,
|
||||
project: str,
|
||||
headers: dict[str, str],
|
||||
definition_id: int,
|
||||
baseline_branch: str,
|
||||
include_entra_update: bool,
|
||||
dry_run: bool,
|
||||
update_assignments: bool,
|
||||
remove_unmanaged: bool,
|
||||
max_workers: int,
|
||||
exclude_csv: str,
|
||||
restore_mode: str = "full",
|
||||
restore_paths_csv: str = "",
|
||||
) -> dict[str, Any]:
|
||||
build_api = f"{collection_uri}/{project}/_apis/build/builds?api-version=7.1"
|
||||
template_parameters = {
|
||||
"dryRun": dry_run,
|
||||
"updateAssignments": update_assignments,
|
||||
"removeObjectsNotInBaseline": remove_unmanaged,
|
||||
"includeEntraUpdate": include_entra_update,
|
||||
"baselineBranch": baseline_branch,
|
||||
"maxWorkers": max_workers,
|
||||
"restoreMode": restore_mode,
|
||||
}
|
||||
if restore_mode == "selective" and restore_paths_csv.strip():
|
||||
template_parameters["restorePathsCsv"] = restore_paths_csv.strip()
|
||||
exclude_csv = _normalize_exclude_csv(exclude_csv)
|
||||
if exclude_csv:
|
||||
template_parameters["excludeCsv"] = exclude_csv
|
||||
body = {
|
||||
"definition": {"id": definition_id},
|
||||
"sourceBranch": _ref_from_branch(baseline_branch),
|
||||
"templateParameters": template_parameters,
|
||||
}
|
||||
return _request_json(build_api, headers=headers, method="POST", body=body)
|
||||
|
||||
|
||||
def _post_pr_thread(repo_api: str, headers: dict[str, str], pr_id: int, content: str) -> None:
|
||||
_request_json(
|
||||
f"{repo_api}/pullrequests/{pr_id}/threads?api-version=7.1",
|
||||
headers=headers,
|
||||
method="POST",
|
||||
body={
|
||||
"comments": [
|
||||
{
|
||||
"parentCommentId": 0,
|
||||
"content": content,
|
||||
"commentType": 1,
|
||||
}
|
||||
],
|
||||
"status": 1,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Queue restore after merged rolling PR with /reject decisions")
|
||||
parser.add_argument("--workload", required=True, choices=["intune", "entra"])
|
||||
parser.add_argument("--drift-branch", required=True)
|
||||
parser.add_argument("--baseline-branch", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not _env_bool("AUTO_REMEDIATE_AFTER_MERGE", False):
|
||||
print("Post-merge auto-remediation disabled (set AUTO_REMEDIATE_AFTER_MERGE=true).")
|
||||
return 0
|
||||
|
||||
token = os.environ.get("SYSTEM_ACCESSTOKEN", "").strip()
|
||||
if not token:
|
||||
raise SystemExit("SYSTEM_ACCESSTOKEN is empty.")
|
||||
|
||||
definition_raw = _env_text("AUTO_REMEDIATE_RESTORE_PIPELINE_ID", "")
|
||||
if not definition_raw:
|
||||
print(
|
||||
"Post-merge auto-remediation queue skipped: "
|
||||
"AUTO_REMEDIATE_RESTORE_PIPELINE_ID is empty."
|
||||
)
|
||||
return 0
|
||||
|
||||
try:
|
||||
definition_id = int(definition_raw)
|
||||
except ValueError as exc:
|
||||
raise SystemExit(f"Invalid AUTO_REMEDIATE_RESTORE_PIPELINE_ID: {definition_raw}") from exc
|
||||
|
||||
max_workers_raw = _env_text("AUTO_REMEDIATE_MAX_WORKERS", "10")
|
||||
try:
|
||||
max_workers = int(max_workers_raw)
|
||||
except ValueError as exc:
|
||||
raise SystemExit(f"Invalid AUTO_REMEDIATE_MAX_WORKERS: {max_workers_raw}") from exc
|
||||
|
||||
lookback_hours_raw = _env_text("AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS", "168")
|
||||
try:
|
||||
lookback_hours = int(lookback_hours_raw)
|
||||
except ValueError as exc:
|
||||
raise SystemExit(f"Invalid AUTO_REMEDIATE_AFTER_MERGE_LOOKBACK_HOURS: {lookback_hours_raw}") from exc
|
||||
|
||||
collection_uri = os.environ["SYSTEM_COLLECTIONURI"].rstrip("/")
|
||||
project = os.environ["SYSTEM_TEAMPROJECT"]
|
||||
repository_id = os.environ["BUILD_REPOSITORY_ID"]
|
||||
|
||||
include_entra_update = _env_bool("AUTO_REMEDIATE_INCLUDE_ENTRA_UPDATE", False)
|
||||
dry_run = _env_bool("AUTO_REMEDIATE_DRY_RUN", False)
|
||||
update_assignments = _env_bool("AUTO_REMEDIATE_UPDATE_ASSIGNMENTS", True)
|
||||
remove_unmanaged = _env_bool("AUTO_REMEDIATE_REMOVE_OBJECTS", False)
|
||||
exclude_csv = _normalize_exclude_csv(_env_text("AUTO_REMEDIATE_EXCLUDE_CSV", ""))
|
||||
|
||||
source_ref = _ref_from_branch(args.drift_branch)
|
||||
target_ref = _ref_from_branch(args.baseline_branch)
|
||||
repo_api = f"{collection_uri}/{project}/_apis/git/repositories/{repository_id}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
cutoff = dt.datetime.now(dt.timezone.utc) - dt.timedelta(hours=lookback_hours)
|
||||
completed = _query_completed_prs(repo_api, headers, source_ref, target_ref)
|
||||
|
||||
candidate: dict[str, Any] | None = None
|
||||
candidate_threads: list[dict[str, Any]] = []
|
||||
candidate_comments: list[str] = []
|
||||
|
||||
for pr in completed:
|
||||
closed_at = _parse_iso_utc(str(pr.get("closedDate", "") or ""))
|
||||
if closed_at and closed_at < cutoff:
|
||||
continue
|
||||
|
||||
merge_commit = (((pr.get("lastMergeCommit") or {}).get("commitId")) or "").strip()
|
||||
if not merge_commit:
|
||||
continue
|
||||
|
||||
pr_id = int(pr.get("pullRequestId"))
|
||||
threads = _threads(repo_api, headers, pr_id)
|
||||
comments = _thread_comment_contents(threads)
|
||||
|
||||
if not _has_reject_signal(comments):
|
||||
continue
|
||||
|
||||
if _has_merge_marker(comments, merge_commit):
|
||||
continue
|
||||
|
||||
candidate = pr
|
||||
candidate_threads = threads
|
||||
candidate_comments = comments
|
||||
break
|
||||
|
||||
if not candidate:
|
||||
print("No merged rolling PR requiring post-merge remediation was found.")
|
||||
return 0
|
||||
|
||||
pr_id = int(candidate.get("pullRequestId"))
|
||||
merge_commit = (((candidate.get("lastMergeCommit") or {}).get("commitId")) or "").strip()
|
||||
rejected_paths = _rejected_ticket_paths(candidate_threads)
|
||||
|
||||
restore_mode = "full"
|
||||
restore_paths_csv = ""
|
||||
if args.workload == "intune" and rejected_paths:
|
||||
restore_mode = "selective"
|
||||
restore_paths_csv = ",".join(rejected_paths)
|
||||
print(f"Post-merge remediation scope: selective ({len(rejected_paths)} rejected path(s)).")
|
||||
for path in rejected_paths:
|
||||
print(f" - {path}")
|
||||
else:
|
||||
print("Post-merge remediation scope: full.")
|
||||
|
||||
try:
|
||||
queued = _queue_restore_pipeline(
|
||||
collection_uri=collection_uri,
|
||||
project=project,
|
||||
headers=headers,
|
||||
definition_id=definition_id,
|
||||
baseline_branch=args.baseline_branch,
|
||||
include_entra_update=include_entra_update,
|
||||
dry_run=dry_run,
|
||||
update_assignments=update_assignments,
|
||||
remove_unmanaged=remove_unmanaged,
|
||||
max_workers=max_workers,
|
||||
exclude_csv=exclude_csv,
|
||||
restore_mode=restore_mode,
|
||||
restore_paths_csv=restore_paths_csv,
|
||||
)
|
||||
except Exception as exc:
|
||||
if _is_permission_error(exc):
|
||||
print(
|
||||
"WARNING: Post-merge remediation queue skipped due permissions. "
|
||||
f"Definition={definition_id}. Details: {exc}"
|
||||
)
|
||||
_diagnose_queue_permission(collection_uri, project, headers, definition_id)
|
||||
print(
|
||||
"Grant 'Queue builds' permission for this pipeline identity on the restore pipeline "
|
||||
"and ensure the pipeline has access to run it."
|
||||
)
|
||||
return 0
|
||||
raise
|
||||
|
||||
build_id = queued.get("id")
|
||||
build_url = ((queued.get("_links") or {}).get("web") or {}).get("href", "")
|
||||
if not build_url and build_id:
|
||||
build_url = f"{collection_uri}/{project}/_build/results?buildId={build_id}"
|
||||
|
||||
marker = f"Automation marker: {MERGE_MARKER_PREFIX}{merge_commit}"
|
||||
comment = (
|
||||
"Auto-remediation queued after merged rolling PR with reviewer /reject decision(s).\n\n"
|
||||
f"Workload: {args.workload}\n"
|
||||
f"Merged PR: #{pr_id}\n"
|
||||
f"Merge commit: {merge_commit}\n"
|
||||
f"Restore pipeline definition: {definition_id}\n"
|
||||
f"Restore run: {build_url or '(queued)'}\n\n"
|
||||
f"{marker}"
|
||||
)
|
||||
|
||||
try:
|
||||
_post_pr_thread(repo_api, headers, pr_id, comment)
|
||||
except Exception as exc:
|
||||
print(f"WARNING: Restore queued, but failed posting merge marker comment on PR #{pr_id}: {exc}")
|
||||
|
||||
print(
|
||||
f"Queued post-merge remediation for PR #{pr_id} (merge_commit={merge_commit}, buildId={build_id})."
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except Exception as exc:
|
||||
print(f"WARNING: Failed post-merge remediation check: {exc}", file=sys.stderr)
|
||||
raise
|
||||
273
scripts/resolve_ca_references.py
Normal file
273
scripts/resolve_ca_references.py
Normal file
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Resolve Conditional Access GUID references to display names in backup JSON."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import pathlib
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
|
||||
SPECIAL_APP_IDS = {
|
||||
"All": "All applications",
|
||||
"None": "None",
|
||||
"Office365": "Office 365",
|
||||
"MicrosoftAdminPortals": "Microsoft Admin Portals",
|
||||
}
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--root", required=True, help="Path to workload backup root (for Entra: tenant-state/entra).")
|
||||
parser.add_argument("--token", required=True, help="Microsoft Graph access token.")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class GraphResolver:
|
||||
def __init__(self, token: str):
|
||||
self.token = token.strip()
|
||||
self.group_cache: dict[str, str | None] = {}
|
||||
self.role_cache: dict[str, str | None] = {}
|
||||
self.app_cache: dict[str, str | None] = {}
|
||||
self.location_cache: dict[str, str | None] = {}
|
||||
self.auth_strength_cache: dict[str, str | None] = {}
|
||||
self._warned: set[str] = set()
|
||||
|
||||
def _warn_once(self, key: str, message: str) -> None:
|
||||
if key in self._warned:
|
||||
return
|
||||
self._warned.add(key)
|
||||
print(f"Warning: {message}")
|
||||
|
||||
def _get(self, url: str) -> dict | None:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.token}",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
method="GET",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code == 404:
|
||||
return None
|
||||
self._warn_once(url, f"Graph lookup failed for {url} (HTTP {exc.code})")
|
||||
return None
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self._warn_once(url, f"Graph lookup failed for {url} ({exc})")
|
||||
return None
|
||||
|
||||
def group_name(self, group_id: str) -> str | None:
|
||||
if group_id in self.group_cache:
|
||||
return self.group_cache[group_id]
|
||||
url = (
|
||||
"https://graph.microsoft.com/v1.0/groups/"
|
||||
+ urllib.parse.quote(group_id)
|
||||
+ "?$select=id,displayName"
|
||||
)
|
||||
payload = self._get(url)
|
||||
name = payload.get("displayName") if isinstance(payload, dict) else None
|
||||
self.group_cache[group_id] = name
|
||||
return name
|
||||
|
||||
def role_name(self, role_template_id: str) -> str | None:
|
||||
if role_template_id in self.role_cache:
|
||||
return self.role_cache[role_template_id]
|
||||
url = (
|
||||
"https://graph.microsoft.com/v1.0/directoryRoleTemplates/"
|
||||
+ urllib.parse.quote(role_template_id)
|
||||
+ "?$select=id,displayName"
|
||||
)
|
||||
payload = self._get(url)
|
||||
name = payload.get("displayName") if isinstance(payload, dict) else None
|
||||
self.role_cache[role_template_id] = name
|
||||
return name
|
||||
|
||||
def app_name(self, app_or_object_id: str) -> str | None:
|
||||
if app_or_object_id in SPECIAL_APP_IDS:
|
||||
return SPECIAL_APP_IDS[app_or_object_id]
|
||||
if app_or_object_id in self.app_cache:
|
||||
return self.app_cache[app_or_object_id]
|
||||
|
||||
# CA app conditions usually use appId; try appId lookup first.
|
||||
url = (
|
||||
"https://graph.microsoft.com/v1.0/servicePrincipals"
|
||||
+ "?$select=id,appId,displayName"
|
||||
+ "&$top=1"
|
||||
+ "&$filter=appId eq '"
|
||||
+ urllib.parse.quote(app_or_object_id)
|
||||
+ "'"
|
||||
)
|
||||
payload = self._get(url)
|
||||
name = None
|
||||
if isinstance(payload, dict):
|
||||
value = payload.get("value")
|
||||
if isinstance(value, list) and value:
|
||||
first = value[0]
|
||||
if isinstance(first, dict):
|
||||
name = first.get("displayName")
|
||||
if not name:
|
||||
# Fallback: treat value as service principal object id.
|
||||
by_id_url = (
|
||||
"https://graph.microsoft.com/v1.0/servicePrincipals/"
|
||||
+ urllib.parse.quote(app_or_object_id)
|
||||
+ "?$select=id,appId,displayName"
|
||||
)
|
||||
by_id = self._get(by_id_url)
|
||||
if isinstance(by_id, dict):
|
||||
name = by_id.get("displayName")
|
||||
self.app_cache[app_or_object_id] = name
|
||||
return name
|
||||
|
||||
def location_name(self, location_id: str) -> str | None:
|
||||
if location_id in self.location_cache:
|
||||
return self.location_cache[location_id]
|
||||
if location_id in {"All", "AllTrusted"}:
|
||||
name = "All locations" if location_id == "All" else "All trusted locations"
|
||||
self.location_cache[location_id] = name
|
||||
return name
|
||||
url = (
|
||||
"https://graph.microsoft.com/v1.0/identity/conditionalAccess/namedLocations/"
|
||||
+ urllib.parse.quote(location_id)
|
||||
+ "?$select=id,displayName"
|
||||
)
|
||||
payload = self._get(url)
|
||||
name = payload.get("displayName") if isinstance(payload, dict) else None
|
||||
self.location_cache[location_id] = name
|
||||
return name
|
||||
|
||||
def auth_strength_name(self, auth_strength_id: str) -> str | None:
|
||||
if auth_strength_id in self.auth_strength_cache:
|
||||
return self.auth_strength_cache[auth_strength_id]
|
||||
url = (
|
||||
"https://graph.microsoft.com/beta/identity/conditionalAccess/authenticationStrength/policies/"
|
||||
+ urllib.parse.quote(auth_strength_id)
|
||||
+ "?$select=id,displayName"
|
||||
)
|
||||
payload = self._get(url)
|
||||
name = payload.get("displayName") if isinstance(payload, dict) else None
|
||||
self.auth_strength_cache[auth_strength_id] = name
|
||||
return name
|
||||
|
||||
|
||||
def resolve_id_list(
|
||||
values: list,
|
||||
lookup_fn,
|
||||
) -> list[dict[str, str]]:
|
||||
resolved: list[dict[str, str]] = []
|
||||
for raw in values:
|
||||
if not isinstance(raw, str) or not raw:
|
||||
continue
|
||||
resolved.append(
|
||||
{
|
||||
"id": raw,
|
||||
"displayName": lookup_fn(raw) or "Unresolved",
|
||||
}
|
||||
)
|
||||
return resolved
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
root = pathlib.Path(args.root).resolve()
|
||||
token = args.token.strip()
|
||||
|
||||
if not token:
|
||||
print("No Graph token provided. Skipping Conditional Access reference enrichment.")
|
||||
return 0
|
||||
|
||||
ca_dir = root / "Conditional Access"
|
||||
if not ca_dir.exists():
|
||||
print(f"Conditional Access folder not found at {ca_dir}. Skipping.")
|
||||
return 0
|
||||
|
||||
resolver = GraphResolver(token)
|
||||
updated_files = 0
|
||||
processed_files = 0
|
||||
|
||||
for file_path in sorted(ca_dir.glob("*.json")):
|
||||
try:
|
||||
payload = json.loads(file_path.read_text(encoding="utf-8"))
|
||||
except Exception: # noqa: BLE001
|
||||
continue
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
processed_files += 1
|
||||
changed = False
|
||||
|
||||
conditions = payload.get("conditions")
|
||||
if not isinstance(conditions, dict):
|
||||
conditions = {}
|
||||
|
||||
users = conditions.get("users")
|
||||
if isinstance(users, dict):
|
||||
for key, lookup in (
|
||||
("includeGroups", resolver.group_name),
|
||||
("excludeGroups", resolver.group_name),
|
||||
("includeRoles", resolver.role_name),
|
||||
("excludeRoles", resolver.role_name),
|
||||
):
|
||||
value = users.get(key)
|
||||
if isinstance(value, list):
|
||||
resolved_key = f"{key}Resolved"
|
||||
resolved_value = resolve_id_list(value, lookup)
|
||||
if users.get(resolved_key) != resolved_value:
|
||||
users[resolved_key] = resolved_value
|
||||
changed = True
|
||||
|
||||
apps = conditions.get("applications")
|
||||
if isinstance(apps, dict):
|
||||
for key in ("includeApplications", "excludeApplications"):
|
||||
value = apps.get(key)
|
||||
if isinstance(value, list):
|
||||
resolved_key = f"{key}Resolved"
|
||||
resolved_value = resolve_id_list(value, resolver.app_name)
|
||||
if apps.get(resolved_key) != resolved_value:
|
||||
apps[resolved_key] = resolved_value
|
||||
changed = True
|
||||
|
||||
locations = conditions.get("locations")
|
||||
if isinstance(locations, dict):
|
||||
for key in ("includeLocations", "excludeLocations"):
|
||||
value = locations.get(key)
|
||||
if isinstance(value, list):
|
||||
resolved_key = f"{key}Resolved"
|
||||
resolved_value = resolve_id_list(value, resolver.location_name)
|
||||
if locations.get(resolved_key) != resolved_value:
|
||||
locations[resolved_key] = resolved_value
|
||||
changed = True
|
||||
|
||||
grant_controls = payload.get("grantControls")
|
||||
if isinstance(grant_controls, dict):
|
||||
auth_strength = grant_controls.get("authenticationStrength")
|
||||
if isinstance(auth_strength, dict):
|
||||
auth_strength_id = auth_strength.get("id")
|
||||
if isinstance(auth_strength_id, str) and auth_strength_id:
|
||||
resolved = {
|
||||
"id": auth_strength_id,
|
||||
"displayName": resolver.auth_strength_name(auth_strength_id) or "Unresolved",
|
||||
}
|
||||
if grant_controls.get("authenticationStrengthResolved") != resolved:
|
||||
grant_controls["authenticationStrengthResolved"] = resolved
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
file_path.write_text(json.dumps(payload, indent=5, ensure_ascii=False) + "\n", encoding="utf-8")
|
||||
updated_files += 1
|
||||
|
||||
print(
|
||||
"Conditional Access GUID enrichment complete. "
|
||||
+ f"Processed files: {processed_files}. "
|
||||
+ f"Updated files: {updated_files}."
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
2777
scripts/update_pr_review_summary.py
Normal file
2777
scripts/update_pr_review_summary.py
Normal file
File diff suppressed because it is too large
Load Diff
130
scripts/validate_backup_outputs.py
Normal file
130
scripts/validate_backup_outputs.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Validate backup outputs for Intune and Entra workloads."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def to_bool(value: str) -> bool:
|
||||
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--workload", required=True, choices=["intune", "entra"])
|
||||
parser.add_argument("--mode", default="light", choices=["light", "full"])
|
||||
parser.add_argument("--root", required=True, help="Workload backup root path.")
|
||||
parser.add_argument("--reports-root", required=True, help="Workload reports root path.")
|
||||
parser.add_argument("--include-named-locations", default="false")
|
||||
parser.add_argument("--include-authentication-strengths", default="false")
|
||||
parser.add_argument("--include-conditional-access", default="false")
|
||||
parser.add_argument("--include-enterprise-applications", default="false")
|
||||
parser.add_argument("--include-enterprise-applications-effective", default="false")
|
||||
parser.add_argument("--include-app-registrations", default="false")
|
||||
parser.add_argument("--include-app-registrations-effective", default="false")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def _require_file(path: Path, label: str, errors: list[str]) -> None:
|
||||
if not path.is_file():
|
||||
errors.append(f"Missing {label}: {path}")
|
||||
|
||||
|
||||
def _json_count(root: Path) -> int:
|
||||
if not root.exists():
|
||||
return 0
|
||||
return sum(1 for _ in root.rglob("*.json"))
|
||||
|
||||
|
||||
def _validate_intune(root: Path, reports_root: Path, errors: list[str]) -> None:
|
||||
if not root.exists():
|
||||
errors.append(f"Missing Intune backup root: {root}")
|
||||
return
|
||||
|
||||
json_count = _json_count(root)
|
||||
if json_count == 0:
|
||||
errors.append(f"Intune backup root has no JSON exports: {root}")
|
||||
|
||||
_require_file(reports_root / "policy-assignments.md", "Intune assignment markdown report", errors)
|
||||
_require_file(reports_root / "policy-assignments.csv", "Intune assignment CSV report", errors)
|
||||
_require_file(reports_root / "object-inventory-all.csv", "Intune object inventory CSV", errors)
|
||||
|
||||
if errors:
|
||||
return
|
||||
print(f"Intune output validation passed: jsonFiles={json_count}")
|
||||
|
||||
|
||||
def _validate_entra(root: Path, reports_root: Path, args: argparse.Namespace, errors: list[str]) -> None:
|
||||
if not root.exists():
|
||||
errors.append(f"Missing Entra backup root: {root}")
|
||||
return
|
||||
|
||||
include_named_locations = to_bool(args.include_named_locations)
|
||||
include_auth_strengths = to_bool(args.include_authentication_strengths)
|
||||
include_conditional_access = to_bool(args.include_conditional_access)
|
||||
include_enterprise_apps = to_bool(args.include_enterprise_applications)
|
||||
include_enterprise_apps_effective = to_bool(args.include_enterprise_applications_effective)
|
||||
include_app_registrations = to_bool(args.include_app_registrations)
|
||||
include_app_registrations_effective = to_bool(args.include_app_registrations_effective)
|
||||
|
||||
expected_category_indexes: list[tuple[str, bool]] = [
|
||||
("Named Locations", include_named_locations),
|
||||
("Authentication Strengths", include_auth_strengths),
|
||||
("Conditional Access", include_conditional_access),
|
||||
("App Registrations", include_app_registrations_effective),
|
||||
("Enterprise Applications", include_enterprise_apps_effective),
|
||||
]
|
||||
|
||||
for category_name, is_required in expected_category_indexes:
|
||||
if not is_required:
|
||||
continue
|
||||
index_path = root / category_name / f"{category_name}.md"
|
||||
_require_file(index_path, f"Entra export index for '{category_name}'", errors)
|
||||
|
||||
_require_file(reports_root / "object-inventory-all.csv", "Entra object inventory CSV", errors)
|
||||
|
||||
if include_conditional_access:
|
||||
_require_file(reports_root / "policy-assignments.md", "Entra assignment markdown report", errors)
|
||||
_require_file(reports_root / "policy-assignments.csv", "Entra assignment CSV report", errors)
|
||||
|
||||
if include_app_registrations_effective or include_enterprise_apps_effective:
|
||||
_require_file(reports_root / "apps-inventory.csv", "Entra apps inventory CSV", errors)
|
||||
|
||||
if errors:
|
||||
return
|
||||
|
||||
json_count = _json_count(root)
|
||||
print(
|
||||
"Entra output validation passed: "
|
||||
f"jsonFiles={json_count}, "
|
||||
f"mode={args.mode}, "
|
||||
f"enterpriseAppsConfigured={str(include_enterprise_apps).lower()}, "
|
||||
f"enterpriseAppsEffective={str(include_enterprise_apps_effective).lower()}, "
|
||||
f"appRegistrationsConfigured={str(include_app_registrations).lower()}, "
|
||||
f"appRegistrationsEffective={str(include_app_registrations_effective).lower()}"
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
root = Path(args.root).resolve()
|
||||
reports_root = Path(args.reports_root).resolve()
|
||||
errors: list[str] = []
|
||||
|
||||
if args.workload == "intune":
|
||||
_validate_intune(root=root, reports_root=reports_root, errors=errors)
|
||||
else:
|
||||
_validate_entra(root=root, reports_root=reports_root, args=args, errors=errors)
|
||||
|
||||
if errors:
|
||||
print("Backup output validation failed:")
|
||||
for item in errors:
|
||||
print(f" - {item}")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user