From be319688f6837fc89945945e458f479cf02a40a2 Mon Sep 17 00:00:00 2001
From: Tomas Kracmar <tomas.kracmar@cqre.net>
Date: Mon, 20 Apr 2026 15:28:12 +0200
Subject: [PATCH] feat: add Azure OpenAI / MS Foundry support for /api/ask

- Add LLM_API_VERSION config for Azure api-version query param
- Detect Azure endpoints and use api-key header instead of Bearer
- Handle base URLs that already include /chat/completions path
- Update .env.example with Azure OpenAI guidance
---
 .env.example          |  4 ++++
 backend/config.py     |  2 ++
 backend/routes/ask.py | 28 ++++++++++++++++++++++------
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/.env.example b/.env.example
index 296b0f3..0dcf744 100644
--- a/.env.example
+++ b/.env.example
@@ -36,8 +36,12 @@ ALERTS_ENABLED=false
 
 # Optional: LLM configuration for natural language querying (/api/ask)
 # Supports any OpenAI-compatible API (OpenAI, Azure OpenAI, Ollama, etc.)
+# For Azure OpenAI / MS Foundry, set BASE_URL to your deployment endpoint
+# (e.g. https://your-resource.openai.azure.com/openai/deployments/your-deployment)
+# and set API_VERSION to something like 2025-01-01-preview
 LLM_API_KEY=
 LLM_BASE_URL=https://api.openai.com/v1
 LLM_MODEL=gpt-4o-mini
 LLM_MAX_EVENTS=50
 LLM_TIMEOUT_SECONDS=30
+LLM_API_VERSION=
diff --git a/backend/config.py b/backend/config.py
index 3942a0f..d7cb2e7 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -48,6 +48,7 @@ class Settings(BaseSettings):
     LLM_MODEL: str = "gpt-4o-mini"
     LLM_MAX_EVENTS: int = 50
     LLM_TIMEOUT_SECONDS: int = 30
+    LLM_API_VERSION: str = ""  # e.g. 2025-01-01-preview for Azure OpenAI
 
 
 _settings = Settings()
@@ -81,3 +82,4 @@ LLM_BASE_URL = _settings.LLM_BASE_URL
 LLM_MODEL = _settings.LLM_MODEL
 LLM_MAX_EVENTS = _settings.LLM_MAX_EVENTS
 LLM_TIMEOUT_SECONDS = _settings.LLM_TIMEOUT_SECONDS
+LLM_API_VERSION = _settings.LLM_API_VERSION
diff --git a/backend/routes/ask.py b/backend/routes/ask.py
index 30c56e6..59e78ff 100644
--- a/backend/routes/ask.py
+++ b/backend/routes/ask.py
@@ -5,7 +5,7 @@ from datetime import UTC, datetime, timedelta
 import httpx
 import structlog
 from auth import require_auth
-from config import LLM_API_KEY, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS
+from config import LLM_API_KEY, LLM_API_VERSION, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS
 from database import events_collection
 from fastapi import APIRouter, Depends, HTTPException
 from models.api import AskRequest, AskResponse
@@ -172,6 +172,15 @@ def _format_events_for_llm(events: list[dict]) -> str:
     return "\n".join(lines)
 
 
+def _build_chat_url(base_url: str, api_version: str) -> str:
+    """Construct the chat completions URL, handling Azure OpenAI endpoints."""
+    base = base_url.rstrip("/")
+    url = base if base.endswith("/chat/completions") else f"{base}/chat/completions"
+    if api_version:
+        url = f"{url}?api-version={api_version}"
+    return url
+
+
 async def _call_llm(question: str, events: list[dict]) -> str:
     if not LLM_API_KEY:
         raise RuntimeError("LLM_API_KEY not configured")
@@ -185,13 +194,20 @@ async def _call_llm(question: str, events: list[dict]) -> str:
         },
     ]
 
+    url = _build_chat_url(LLM_BASE_URL, LLM_API_VERSION)
+    headers = {
+        "Content-Type": "application/json",
+    }
+    # Azure OpenAI uses api-key header; standard OpenAI uses Bearer token
+    if "azure" in LLM_BASE_URL.lower() or "cognitiveservices" in LLM_BASE_URL.lower():
+        headers["api-key"] = LLM_API_KEY
+    else:
+        headers["Authorization"] = f"Bearer {LLM_API_KEY}"
+
     async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
         resp = await client.post(
-            f"{LLM_BASE_URL.rstrip('/')}/chat/completions",
-            headers={
-                "Authorization": f"Bearer {LLM_API_KEY}",
-                "Content-Type": "application/json",
-            },
+            url,
+            headers=headers,
             json={
                 "model": LLM_MODEL,
                 "messages": messages,