Claude commited on
Commit
a4327d1
·
unverified ·
1 Parent(s): e4c6475

fix(P1): Replace stale 72B model fallbacks with 7B to avoid Novita 500 errors

Browse files

Bug: Three files had hardcoded fallbacks to Qwen2.5-72B-Instruct which
routes to Novita provider (unreliable, 500 errors). The config.py was
updated to use 7B, but these fallbacks were stale.

Files fixed:
- src/clients/huggingface.py:59 - HuggingFaceChatClient init
- src/agent_factory/judges.py:85 - get_model() HuggingFace branch
- src/orchestrators/langgraph_orchestrator.py:43 - LangGraph init (deprecated)

All fallbacks now use Qwen/Qwen2.5-7B-Instruct to stay on HuggingFace
native serverless infrastructure (models < 30B params).

See: CLAUDE.md "CRITICAL: HuggingFace Free Tier Architecture" section

All 310 unit tests pass.

src/agent_factory/judges.py CHANGED
@@ -82,7 +82,8 @@ def get_model() -> Any:
82
 
83
  # Priority 3: HuggingFace (requires HF_TOKEN)
84
  if settings.has_huggingface_key:
85
- model_name = settings.huggingface_model or "Qwen/Qwen2.5-72B-Instruct"
 
86
  hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
87
  return HuggingFaceModel(model_name, provider=hf_provider)
88
 
 
82
 
83
  # Priority 3: HuggingFace (requires HF_TOKEN)
84
  if settings.has_huggingface_key:
85
+ # FIX: Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
86
+ model_name = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
87
  hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
88
  return HuggingFaceModel(model_name, provider=hf_provider)
89
 
src/clients/huggingface.py CHANGED
@@ -51,12 +51,13 @@ class HuggingFaceChatClient(BaseChatClient): # type: ignore[misc]
51
  """Initialize the HuggingFace chat client.
52
 
53
  Args:
54
- model_id: The HuggingFace model ID (default: configured value or Qwen2.5-72B).
55
  api_key: HF_TOKEN (optional, defaults to env var).
56
  **kwargs: Additional arguments passed to BaseChatClient.
57
  """
58
  super().__init__(**kwargs)
59
- self.model_id = model_id or settings.huggingface_model or "Qwen/Qwen2.5-72B-Instruct"
 
60
  self.api_key = api_key or settings.hf_token
61
 
62
  # Initialize the HF Inference Client
 
51
  """Initialize the HuggingFace chat client.
52
 
53
  Args:
54
+ model_id: The HuggingFace model ID (default: configured value or Qwen2.5-7B).
55
  api_key: HF_TOKEN (optional, defaults to env var).
56
  **kwargs: Additional arguments passed to BaseChatClient.
57
  """
58
  super().__init__(**kwargs)
59
+ # FIX: Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
60
+ self.model_id = model_id or settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
61
  self.api_key = api_key or settings.hf_token
62
 
63
  # Initialize the HF Inference Client
src/orchestrators/langgraph_orchestrator.py CHANGED
@@ -38,8 +38,9 @@ class LangGraphOrchestrator(OrchestratorProtocol):
38
 
39
  # Initialize the LLM (Qwen 2.5 via HF Inference)
40
  # We use the serverless API by default
41
- # NOTE: Llama-3.1-70B routes to Hyperbolic (unreliable staging mode)
42
- repo_id = "Qwen/Qwen2.5-72B-Instruct"
 
43
 
44
  # Ensure we have an API key
45
  api_key = settings.hf_token
 
38
 
39
  # Initialize the LLM (Qwen 2.5 via HF Inference)
40
  # We use the serverless API by default
41
+ # FIX: Use 7B model to stay on HuggingFace native infrastructure
42
+ # Large models (70B+) route to Novita/Hyperbolic providers (500/401 errors)
43
+ repo_id = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
44
 
45
  # Ensure we have an API key
46
  api_key = settings.hf_token