# Use lightweight Python base image FROM python:3.10-slim # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/* # Copy requirements and install COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY . . # ✅ Hugging Face cache directory ENV HF_HOME=/tmp # ENV TRANSFORMERS_CACHE=/tmp # Pre-download K2-Think model to avoid cold start # RUN python -c "from transformers import AutoTokenizer, AutoModelForCausalLM; \ # model_id='LLM360/K2-Think'; \ # AutoTokenizer.from_pretrained(model_id, cache_dir='/tmp'); \ # AutoModelForCausalLM.from_pretrained(model_id, cache_dir='/tmp')" # Expose port EXPOSE 7860 # Command to run the FastAPI app CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]