fix: use max_completion_tokens and remove temperature for Azure OpenAI compat
- Replace max_tokens with max_completion_tokens (required by newer Azure models) - Remove hardcoded temperature (not supported by all model types) - Add response body logging on LLM API errors for easier debugging
This commit is contained in:
@@ -204,18 +204,18 @@ async def _call_llm(question: str, events: list[dict]) -> str:
|
||||
else:
|
||||
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
|
||||
|
||||
payload = {
|
||||
"model": LLM_MODEL,
|
||||
"messages": messages,
|
||||
"max_completion_tokens": 800,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
|
||||
resp = await client.post(
|
||||
url,
|
||||
headers=headers,
|
||||
json={
|
||||
"model": LLM_MODEL,
|
||||
"messages": messages,
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 800,
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
resp = await client.post(url, headers=headers, json=payload)
|
||||
if resp.status_code >= 400:
|
||||
body = resp.text
|
||||
logger.error("LLM API error", status_code=resp.status_code, url=url, response_body=body)
|
||||
raise RuntimeError(f"LLM API error {resp.status_code}: {body[:500]}")
|
||||
data = resp.json()
|
||||
return data["choices"][0]["message"]["content"].strip()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user