fix: use max_completion_tokens and remove temperature for Azure OpenAI compat

- Replace max_tokens with max_completion_tokens (required by newer Azure models) - Remove hardcoded temperature (not supported by all model types) - Add response body logging on LLM API errors for easier debugging
2026-04-20 15:55:00 +02:00
parent 9ec193ea13
commit 4303b8f02c
1 changed files with 11 additions and 11 deletions
@@ -204,18 +204,18 @@ async def _call_llm(question: str, events: list[dict]) -> str:
    else:
        headers["Authorization"] = f"Bearer {LLM_API_KEY}"
    payload = {
        "model": LLM_MODEL,
        "messages": messages,
        "max_completion_tokens": 800,
    }
    async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
-        resp = await client.post(
+        resp = await client.post(url, headers=headers, json=payload)
-            url,
+        if resp.status_code >= 400:
-            headers=headers,
+            body = resp.text
-            json={
+            logger.error("LLM API error", status_code=resp.status_code, url=url, response_body=body)
-                "model": LLM_MODEL,
+            raise RuntimeError(f"LLM API error {resp.status_code}: {body[:500]}")
                "messages": messages,
                "temperature": 0.3,
                "max_tokens": 800,
            },
        )
        resp.raise_for_status()
        data = resp.json()
        return data["choices"][0]["message"]["content"].strip()