From 4303b8f02cfcc68f122ccce8e75f147d23f22d5c Mon Sep 17 00:00:00 2001
From: Tomas Kracmar <tomas.kracmar@cqre.net>
Date: Mon, 20 Apr 2026 15:55:00 +0200
Subject: [PATCH] fix: use max_completion_tokens and remove temperature for
 Azure OpenAI compat

- Replace max_tokens with max_completion_tokens (required by newer Azure models)
- Remove hardcoded temperature (not supported by all model types)
- Add response body logging on LLM API errors for easier debugging
---
 backend/routes/ask.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/backend/routes/ask.py b/backend/routes/ask.py
index a459e03..1884df3 100644
--- a/backend/routes/ask.py
+++ b/backend/routes/ask.py
@@ -204,18 +204,18 @@ async def _call_llm(question: str, events: list[dict]) -> str:
     else:
         headers["Authorization"] = f"Bearer {LLM_API_KEY}"
 
+    payload = {
+        "model": LLM_MODEL,
+        "messages": messages,
+        "max_completion_tokens": 800,
+    }
+
     async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
-        resp = await client.post(
-            url,
-            headers=headers,
-            json={
-                "model": LLM_MODEL,
-                "messages": messages,
-                "temperature": 0.3,
-                "max_tokens": 800,
-            },
-        )
-        resp.raise_for_status()
+        resp = await client.post(url, headers=headers, json=payload)
+        if resp.status_code >= 400:
+            body = resp.text
+            logger.error("LLM API error", status_code=resp.status_code, url=url, response_body=body)
+            raise RuntimeError(f"LLM API error {resp.status_code}: {body[:500]}")
         data = resp.json()
         return data["choices"][0]["message"]["content"].strip()