fix: use max_completion_tokens and remove temperature for Azure OpenAI compat
All checks were successful
CI / lint-and-test (push) Successful in 35s
Release / build-and-push (push) Successful in 40s

- Replace max_tokens with max_completion_tokens (required by newer Azure models)
- Remove hardcoded temperature (not supported by all model types)
- Add response body logging on LLM API errors for easier debugging
This commit is contained in:
2026-04-20 15:55:00 +02:00
parent 9ec193ea13
commit 4303b8f02c

View File

@@ -204,18 +204,18 @@ async def _call_llm(question: str, events: list[dict]) -> str:
else:
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
resp = await client.post(
url,
headers=headers,
json={
payload = {
"model": LLM_MODEL,
"messages": messages,
"temperature": 0.3,
"max_tokens": 800,
},
)
resp.raise_for_status()
"max_completion_tokens": 800,
}
async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
resp = await client.post(url, headers=headers, json=payload)
if resp.status_code >= 400:
body = resp.text
logger.error("LLM API error", status_code=resp.status_code, url=url, response_body=body)
raise RuntimeError(f"LLM API error {resp.status_code}: {body[:500]}")
data = resp.json()
return data["choices"][0]["message"]["content"].strip()