feat: add Azure OpenAI / MS Foundry support for /api/ask

- Add LLM_API_VERSION config for Azure api-version query param - Detect Azure endpoints and use api-key header instead of Bearer - Handle base URLs that already include /chat/completions path - Update .env.example with Azure OpenAI guidance
2026-04-20 15:28:12 +02:00
parent 22d237fbfb
commit be319688f6
3 changed files with 28 additions and 6 deletions
@@ -36,8 +36,12 @@ ALERTS_ENABLED=false

 # Optional: LLM configuration for natural language querying (/api/ask)
 # Supports any OpenAI-compatible API (OpenAI, Azure OpenAI, Ollama, etc.)
+# For Azure OpenAI / MS Foundry, set BASE_URL to your deployment endpoint
+# (e.g. https://your-resource.openai.azure.com/openai/deployments/your-deployment)
+# and set API_VERSION to something like 2025-01-01-preview
 LLM_API_KEY=
 LLM_BASE_URL=https://api.openai.com/v1
 LLM_MODEL=gpt-4o-mini
 LLM_MAX_EVENTS=50
 LLM_TIMEOUT_SECONDS=30
+LLM_API_VERSION=
@@ -48,6 +48,7 @@ class Settings(BaseSettings):
    LLM_MODEL: str = "gpt-4o-mini"
    LLM_MAX_EVENTS: int = 50
    LLM_TIMEOUT_SECONDS: int = 30
+    LLM_API_VERSION: str = ""  # e.g. 2025-01-01-preview for Azure OpenAI


 _settings = Settings()
@@ -81,3 +82,4 @@ LLM_BASE_URL = _settings.LLM_BASE_URL
 LLM_MODEL = _settings.LLM_MODEL
 LLM_MAX_EVENTS = _settings.LLM_MAX_EVENTS
 LLM_TIMEOUT_SECONDS = _settings.LLM_TIMEOUT_SECONDS
+LLM_API_VERSION = _settings.LLM_API_VERSION
@@ -5,7 +5,7 @@ from datetime import UTC, datetime, timedelta
 import httpx
 import structlog
 from auth import require_auth
-from config import LLM_API_KEY, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS
+from config import LLM_API_KEY, LLM_API_VERSION, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS
 from database import events_collection
 from fastapi import APIRouter, Depends, HTTPException
 from models.api import AskRequest, AskResponse
@@ -172,6 +172,15 @@ def _format_events_for_llm(events: list[dict]) -> str:
    return "\n".join(lines)


+def _build_chat_url(base_url: str, api_version: str) -> str:
+    """Construct the chat completions URL, handling Azure OpenAI endpoints."""
+    base = base_url.rstrip("/")
+    url = base if base.endswith("/chat/completions") else f"{base}/chat/completions"
+    if api_version:
+        url = f"{url}?api-version={api_version}"
+    return url
+
+
 async def _call_llm(question: str, events: list[dict]) -> str:
    if not LLM_API_KEY:
        raise RuntimeError("LLM_API_KEY not configured")
@@ -185,13 +194,20 @@ async def _call_llm(question: str, events: list[dict]) -> str:
        },
    ]

+    url = _build_chat_url(LLM_BASE_URL, LLM_API_VERSION)
+    headers = {
+        "Content-Type": "application/json",
+    }
+    # Azure OpenAI uses api-key header; standard OpenAI uses Bearer token
+    if "azure" in LLM_BASE_URL.lower() or "cognitiveservices" in LLM_BASE_URL.lower():
+        headers["api-key"] = LLM_API_KEY
+    else:
+        headers["Authorization"] = f"Bearer {LLM_API_KEY}"
+
    async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
        resp = await client.post(
-            f"{LLM_BASE_URL.rstrip('/')}/chat/completions",
-            headers={
-                "Authorization": f"Bearer {LLM_API_KEY}",
-                "Content-Type": "application/json",
-            },
+            url,
+            headers=headers,
            json={
                "model": LLM_MODEL,
                "messages": messages,