Claude
commited on
fix(P1): Replace stale 72B model fallbacks with 7B to avoid Novita 500 errors
Browse filesBug: Three files had hardcoded fallbacks to Qwen2.5-72B-Instruct which
routes to Novita provider (unreliable, 500 errors). The config.py was
updated to use 7B, but these fallbacks were stale.
Files fixed:
- src/clients/huggingface.py:59 - HuggingFaceChatClient init
- src/agent_factory/judges.py:85 - get_model() HuggingFace branch
- src/orchestrators/langgraph_orchestrator.py:43 - LangGraph init (deprecated)
All fallbacks now use Qwen/Qwen2.5-7B-Instruct to stay on HuggingFace
native serverless infrastructure (models < 30B params).
See: CLAUDE.md "CRITICAL: HuggingFace Free Tier Architecture" section
All 310 unit tests pass.
src/agent_factory/judges.py
CHANGED
|
@@ -82,7 +82,8 @@ def get_model() -> Any:
|
|
| 82 |
|
| 83 |
# Priority 3: HuggingFace (requires HF_TOKEN)
|
| 84 |
if settings.has_huggingface_key:
|
| 85 |
-
|
|
|
|
| 86 |
hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
|
| 87 |
return HuggingFaceModel(model_name, provider=hf_provider)
|
| 88 |
|
|
|
|
| 82 |
|
| 83 |
# Priority 3: HuggingFace (requires HF_TOKEN)
|
| 84 |
if settings.has_huggingface_key:
|
| 85 |
+
# FIX: Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
|
| 86 |
+
model_name = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
|
| 87 |
hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
|
| 88 |
return HuggingFaceModel(model_name, provider=hf_provider)
|
| 89 |
|
src/clients/huggingface.py
CHANGED
|
@@ -51,12 +51,13 @@ class HuggingFaceChatClient(BaseChatClient): # type: ignore[misc]
|
|
| 51 |
"""Initialize the HuggingFace chat client.
|
| 52 |
|
| 53 |
Args:
|
| 54 |
-
model_id: The HuggingFace model ID (default: configured value or Qwen2.5-
|
| 55 |
api_key: HF_TOKEN (optional, defaults to env var).
|
| 56 |
**kwargs: Additional arguments passed to BaseChatClient.
|
| 57 |
"""
|
| 58 |
super().__init__(**kwargs)
|
| 59 |
-
|
|
|
|
| 60 |
self.api_key = api_key or settings.hf_token
|
| 61 |
|
| 62 |
# Initialize the HF Inference Client
|
|
|
|
| 51 |
"""Initialize the HuggingFace chat client.
|
| 52 |
|
| 53 |
Args:
|
| 54 |
+
model_id: The HuggingFace model ID (default: configured value or Qwen2.5-7B).
|
| 55 |
api_key: HF_TOKEN (optional, defaults to env var).
|
| 56 |
**kwargs: Additional arguments passed to BaseChatClient.
|
| 57 |
"""
|
| 58 |
super().__init__(**kwargs)
|
| 59 |
+
# FIX: Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
|
| 60 |
+
self.model_id = model_id or settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
|
| 61 |
self.api_key = api_key or settings.hf_token
|
| 62 |
|
| 63 |
# Initialize the HF Inference Client
|
src/orchestrators/langgraph_orchestrator.py
CHANGED
|
@@ -38,8 +38,9 @@ class LangGraphOrchestrator(OrchestratorProtocol):
|
|
| 38 |
|
| 39 |
# Initialize the LLM (Qwen 2.5 via HF Inference)
|
| 40 |
# We use the serverless API by default
|
| 41 |
-
#
|
| 42 |
-
|
|
|
|
| 43 |
|
| 44 |
# Ensure we have an API key
|
| 45 |
api_key = settings.hf_token
|
|
|
|
| 38 |
|
| 39 |
# Initialize the LLM (Qwen 2.5 via HF Inference)
|
| 40 |
# We use the serverless API by default
|
| 41 |
+
# FIX: Use 7B model to stay on HuggingFace native infrastructure
|
| 42 |
+
# Large models (70B+) route to Novita/Hyperbolic providers (500/401 errors)
|
| 43 |
+
repo_id = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
|
| 44 |
|
| 45 |
# Ensure we have an API key
|
| 46 |
api_key = settings.hf_token
|