From be319688f6837fc89945945e458f479cf02a40a2 Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Mon, 20 Apr 2026 15:28:12 +0200 Subject: [PATCH] feat: add Azure OpenAI / MS Foundry support for /api/ask - Add LLM_API_VERSION config for Azure api-version query param - Detect Azure endpoints and use api-key header instead of Bearer - Handle base URLs that already include /chat/completions path - Update .env.example with Azure OpenAI guidance --- .env.example | 4 ++++ backend/config.py | 2 ++ backend/routes/ask.py | 28 ++++++++++++++++++++++------ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index 296b0f3..0dcf744 100644 --- a/.env.example +++ b/.env.example @@ -36,8 +36,12 @@ ALERTS_ENABLED=false # Optional: LLM configuration for natural language querying (/api/ask) # Supports any OpenAI-compatible API (OpenAI, Azure OpenAI, Ollama, etc.) +# For Azure OpenAI / MS Foundry, set BASE_URL to your deployment endpoint +# (e.g. https://your-resource.openai.azure.com/openai/deployments/your-deployment) +# and set API_VERSION to something like 2025-01-01-preview LLM_API_KEY= LLM_BASE_URL=https://api.openai.com/v1 LLM_MODEL=gpt-4o-mini LLM_MAX_EVENTS=50 LLM_TIMEOUT_SECONDS=30 +LLM_API_VERSION= diff --git a/backend/config.py b/backend/config.py index 3942a0f..d7cb2e7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -48,6 +48,7 @@ class Settings(BaseSettings): LLM_MODEL: str = "gpt-4o-mini" LLM_MAX_EVENTS: int = 50 LLM_TIMEOUT_SECONDS: int = 30 + LLM_API_VERSION: str = "" # e.g. 2025-01-01-preview for Azure OpenAI _settings = Settings() @@ -81,3 +82,4 @@ LLM_BASE_URL = _settings.LLM_BASE_URL LLM_MODEL = _settings.LLM_MODEL LLM_MAX_EVENTS = _settings.LLM_MAX_EVENTS LLM_TIMEOUT_SECONDS = _settings.LLM_TIMEOUT_SECONDS +LLM_API_VERSION = _settings.LLM_API_VERSION diff --git a/backend/routes/ask.py b/backend/routes/ask.py index 30c56e6..59e78ff 100644 --- a/backend/routes/ask.py +++ b/backend/routes/ask.py @@ -5,7 +5,7 @@ from datetime import UTC, datetime, timedelta import httpx import structlog from auth import require_auth -from config import LLM_API_KEY, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS +from config import LLM_API_KEY, LLM_API_VERSION, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS from database import events_collection from fastapi import APIRouter, Depends, HTTPException from models.api import AskRequest, AskResponse @@ -172,6 +172,15 @@ def _format_events_for_llm(events: list[dict]) -> str: return "\n".join(lines) +def _build_chat_url(base_url: str, api_version: str) -> str: + """Construct the chat completions URL, handling Azure OpenAI endpoints.""" + base = base_url.rstrip("/") + url = base if base.endswith("/chat/completions") else f"{base}/chat/completions" + if api_version: + url = f"{url}?api-version={api_version}" + return url + + async def _call_llm(question: str, events: list[dict]) -> str: if not LLM_API_KEY: raise RuntimeError("LLM_API_KEY not configured") @@ -185,13 +194,20 @@ async def _call_llm(question: str, events: list[dict]) -> str: }, ] + url = _build_chat_url(LLM_BASE_URL, LLM_API_VERSION) + headers = { + "Content-Type": "application/json", + } + # Azure OpenAI uses api-key header; standard OpenAI uses Bearer token + if "azure" in LLM_BASE_URL.lower() or "cognitiveservices" in LLM_BASE_URL.lower(): + headers["api-key"] = LLM_API_KEY + else: + headers["Authorization"] = f"Bearer {LLM_API_KEY}" + async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client: resp = await client.post( - f"{LLM_BASE_URL.rstrip('/')}/chat/completions", - headers={ - "Authorization": f"Bearer {LLM_API_KEY}", - "Content-Type": "application/json", - }, + url, + headers=headers, json={ "model": LLM_MODEL, "messages": messages,