feat: add Azure OpenAI / MS Foundry support for /api/ask
- Add LLM_API_VERSION config for Azure api-version query param - Detect Azure endpoints and use api-key header instead of Bearer - Handle base URLs that already include /chat/completions path - Update .env.example with Azure OpenAI guidance
This commit is contained in:
@@ -48,6 +48,7 @@ class Settings(BaseSettings):
|
||||
LLM_MODEL: str = "gpt-4o-mini"
|
||||
LLM_MAX_EVENTS: int = 50
|
||||
LLM_TIMEOUT_SECONDS: int = 30
|
||||
LLM_API_VERSION: str = "" # e.g. 2025-01-01-preview for Azure OpenAI
|
||||
|
||||
|
||||
_settings = Settings()
|
||||
@@ -81,3 +82,4 @@ LLM_BASE_URL = _settings.LLM_BASE_URL
|
||||
LLM_MODEL = _settings.LLM_MODEL
|
||||
LLM_MAX_EVENTS = _settings.LLM_MAX_EVENTS
|
||||
LLM_TIMEOUT_SECONDS = _settings.LLM_TIMEOUT_SECONDS
|
||||
LLM_API_VERSION = _settings.LLM_API_VERSION
|
||||
|
||||
@@ -5,7 +5,7 @@ from datetime import UTC, datetime, timedelta
|
||||
import httpx
|
||||
import structlog
|
||||
from auth import require_auth
|
||||
from config import LLM_API_KEY, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS
|
||||
from config import LLM_API_KEY, LLM_API_VERSION, LLM_BASE_URL, LLM_MAX_EVENTS, LLM_MODEL, LLM_TIMEOUT_SECONDS
|
||||
from database import events_collection
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from models.api import AskRequest, AskResponse
|
||||
@@ -172,6 +172,15 @@ def _format_events_for_llm(events: list[dict]) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_chat_url(base_url: str, api_version: str) -> str:
|
||||
"""Construct the chat completions URL, handling Azure OpenAI endpoints."""
|
||||
base = base_url.rstrip("/")
|
||||
url = base if base.endswith("/chat/completions") else f"{base}/chat/completions"
|
||||
if api_version:
|
||||
url = f"{url}?api-version={api_version}"
|
||||
return url
|
||||
|
||||
|
||||
async def _call_llm(question: str, events: list[dict]) -> str:
|
||||
if not LLM_API_KEY:
|
||||
raise RuntimeError("LLM_API_KEY not configured")
|
||||
@@ -185,13 +194,20 @@ async def _call_llm(question: str, events: list[dict]) -> str:
|
||||
},
|
||||
]
|
||||
|
||||
url = _build_chat_url(LLM_BASE_URL, LLM_API_VERSION)
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
# Azure OpenAI uses api-key header; standard OpenAI uses Bearer token
|
||||
if "azure" in LLM_BASE_URL.lower() or "cognitiveservices" in LLM_BASE_URL.lower():
|
||||
headers["api-key"] = LLM_API_KEY
|
||||
else:
|
||||
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
|
||||
resp = await client.post(
|
||||
f"{LLM_BASE_URL.rstrip('/')}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {LLM_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
url,
|
||||
headers=headers,
|
||||
json={
|
||||
"model": LLM_MODEL,
|
||||
"messages": messages,
|
||||
|
||||
Reference in New Issue
Block a user