fix: use max_completion_tokens and remove temperature for Azure OpenAI compat
- Replace max_tokens with max_completion_tokens (required by newer Azure models) - Remove hardcoded temperature (not supported by all model types) - Add response body logging on LLM API errors for easier debugging
This commit is contained in:
@@ -204,18 +204,18 @@ async def _call_llm(question: str, events: list[dict]) -> str:
|
|||||||
else:
|
else:
|
||||||
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
|
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": LLM_MODEL,
|
||||||
|
"messages": messages,
|
||||||
|
"max_completion_tokens": 800,
|
||||||
|
}
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
|
async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(url, headers=headers, json=payload)
|
||||||
url,
|
if resp.status_code >= 400:
|
||||||
headers=headers,
|
body = resp.text
|
||||||
json={
|
logger.error("LLM API error", status_code=resp.status_code, url=url, response_body=body)
|
||||||
"model": LLM_MODEL,
|
raise RuntimeError(f"LLM API error {resp.status_code}: {body[:500]}")
|
||||||
"messages": messages,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"max_tokens": 800,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
return data["choices"][0]["message"]["content"].strip()
|
return data["choices"][0]["message"]["content"].strip()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user