From 4303b8f02cfcc68f122ccce8e75f147d23f22d5c Mon Sep 17 00:00:00 2001 From: Tomas Kracmar Date: Mon, 20 Apr 2026 15:55:00 +0200 Subject: [PATCH] fix: use max_completion_tokens and remove temperature for Azure OpenAI compat - Replace max_tokens with max_completion_tokens (required by newer Azure models) - Remove hardcoded temperature (not supported by all model types) - Add response body logging on LLM API errors for easier debugging --- backend/routes/ask.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/routes/ask.py b/backend/routes/ask.py index a459e03..1884df3 100644 --- a/backend/routes/ask.py +++ b/backend/routes/ask.py @@ -204,18 +204,18 @@ async def _call_llm(question: str, events: list[dict]) -> str: else: headers["Authorization"] = f"Bearer {LLM_API_KEY}" + payload = { + "model": LLM_MODEL, + "messages": messages, + "max_completion_tokens": 800, + } + async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client: - resp = await client.post( - url, - headers=headers, - json={ - "model": LLM_MODEL, - "messages": messages, - "temperature": 0.3, - "max_tokens": 800, - }, - ) - resp.raise_for_status() + resp = await client.post(url, headers=headers, json=payload) + if resp.status_code >= 400: + body = resp.text + logger.error("LLM API error", status_code=resp.status_code, url=url, response_body=body) + raise RuntimeError(f"LLM API error {resp.status_code}: {body[:500]}") data = resp.json() return data["choices"][0]["message"]["content"].strip()