fix: use max_completion_tokens and remove temperature for Azure OpenAI compat

- Replace max_tokens with max_completion_tokens (required by newer Azure models) - Remove hardcoded temperature (not supported by all model types) - Add response body logging on LLM API errors for easier debugging
2026-04-20 15:55:00 +02:00
parent 9ec193ea13
commit 4303b8f02c
1 changed files with 11 additions and 11 deletions
--- a/backend/routes/ask.py
+++ b/backend/routes/ask.py
@@ -204,18 +204,18 @@ async def _call_llm(question: str, events: list[dict]) -> str:
    else:
        headers["Authorization"] = f"Bearer {LLM_API_KEY}"

-    async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
-        resp = await client.post(
-            url,
-            headers=headers,
-            json={
+    payload = {
        "model": LLM_MODEL,
        "messages": messages,
-                "temperature": 0.3,
-                "max_tokens": 800,
-            },
-        )
-        resp.raise_for_status()
+        "max_completion_tokens": 800,
+    }
+
+    async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
+        resp = await client.post(url, headers=headers, json=payload)
+        if resp.status_code >= 400:
+            body = resp.text
+            logger.error("LLM API error", status_code=resp.status_code, url=url, response_body=body)
+            raise RuntimeError(f"LLM API error {resp.status_code}: {body[:500]}")
        data = resp.json()
        return data["choices"][0]["message"]["content"].strip()