Spaces:
Sleeping
Sleeping
| # from fastapi import FastAPI | |
| # from pydantic import BaseModel | |
| # from fastapi.openapi.utils import get_openapi | |
| # from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # import torch | |
| # app = FastAPI( | |
| # title="Harshal AI Backend", | |
| # version="1.0.0", | |
| # ) | |
| # MODEL_NAME = "Qwen/Qwen2.5-0.5B" | |
| # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| # model = AutoModelForCausalLM.from_pretrained( | |
| # MODEL_NAME, | |
| # torch_dtype=torch.float32, | |
| # device_map="cpu", | |
| # ) | |
| # class ChatMessage(BaseModel): | |
| # messages: list | |
| # @app.get("/") | |
| # def home(): | |
| # return {"message": "Harshal AI backend running with Qwen 0.5B!"} | |
| # @app.post("/chat") | |
| # def chat(body: ChatMessage): | |
| # user_msg = body.messages[-1]["content"] | |
| # prompt = f"User: {user_msg}\nAssistant:" | |
| # inputs = tokenizer(prompt, return_tensors="pt") | |
| # outputs = model.generate( | |
| # **inputs, | |
| # max_new_tokens=120, | |
| # pad_token_id=tokenizer.eos_token_id, | |
| # temperature=0.4, | |
| # ) | |
| # text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # reply = text.split("Assistant:")[-1].strip() | |
| # return {"reply": reply} | |
| # @app.get("/openapi.json") | |
| # def openapi_json(): | |
| # return get_openapi( | |
| # title="Harshal AI Backend", | |
| # version="1.0.0", | |
| # routes=app.routes | |
| # ) | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import List | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from sentence_transformers import SentenceTransformer | |
| from pypdf import PdfReader | |
| import torch, os | |
| app = FastAPI(title="Harshal AI Backend", version="1.0.0") | |
| # CORS (Next.js frontend) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ============================================================ | |
| # 1) LOAD MAIN MODEL (Phi-3 Mini — good balance of quality/speed) | |
| # ============================================================ | |
| MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| dtype=torch.float32, # instead of torch_dtype | |
| ) | |
| llm.eval() | |
| # ============================================================ | |
| # 2) LOAD EMBEDDINGS + BUILD RAG FROM resume.pdf | |
| # ============================================================ | |
| EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
| embedder = SentenceTransformer(EMBED_MODEL) | |
| RESUME_FILE = "resume.pdf" | |
| resume_rag = None | |
| def chunk_text(text, max_chars=450, overlap=80): | |
| """Simple overlapping chunks.""" | |
| text = " ".join(text.split()) | |
| chunks, start = [], 0 | |
| while start < len(text): | |
| end = start + max_chars | |
| chunks.append(text[start:end]) | |
| start = end - overlap | |
| return chunks | |
| def build_rag(): | |
| """Reads resume.pdf → chunks → embeddings.""" | |
| global resume_rag | |
| if not os.path.exists(RESUME_FILE): | |
| print("⚠ resume.pdf NOT FOUND — RAG disabled.") | |
| return | |
| reader = PdfReader(RESUME_FILE) | |
| full_text = "" | |
| for page in reader.pages: | |
| full_text += page.extract_text() or "" | |
| chunks = chunk_text(full_text) | |
| embeddings = embedder.encode( | |
| chunks, convert_to_tensor=True, normalize_embeddings=True | |
| ) | |
| resume_rag = {"chunks": chunks, "embs": embeddings} | |
| print("✅ Resume RAG built with", len(chunks), "chunks") | |
| build_rag() | |
| def retrieve_rag(query, top_k=3): | |
| """Find most relevant resume chunks.""" | |
| if resume_rag is None: | |
| return "" | |
| q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0] | |
| sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"]) | |
| top = torch.topk(sims, k=min(top_k, len(sims))) | |
| return "\n\n".join(resume_rag["chunks"][i] for i in top.indices) | |
| # ============================================================ | |
| # 3) REQUEST MODELS | |
| # ============================================================ | |
| class Message(BaseModel): | |
| role: str | |
| content: str | |
| class ChatReq(BaseModel): | |
| messages: List[Message] | |
| # ============================================================ | |
| # 4) SMART MODE CHAT ROUTE (Resume + Fallback) | |
| # ============================================================ | |
| def chat(req: ChatReq): | |
| user_msg = req.messages[-1].content.strip() | |
| # RAG: take from resume | |
| rag_context = retrieve_rag(user_msg) | |
| # Detect if we found resume info | |
| has_relevant_rag = rag_context and len(rag_context) > 40 | |
| # SMART RULE: | |
| # If resume has relevant info → answer ONLY from resume | |
| # If resume has nothing → fallback using general persona | |
| if has_relevant_rag: | |
| fallback_note = "" | |
| resume_instruction = """ | |
| Use ONLY the details provided in the resume context. | |
| Do NOT invent any new companies, dates, or roles. | |
| Stay strictly consistent with: | |
| - Current company: Börm Bruckmeier Infotech India Pvt. Ltd. | |
| - Past experience: SarvaTech Consultants (Internship) | |
| - Personal projects listed in resume. | |
| """ | |
| else: | |
| fallback_note = """ | |
| The resume does NOT mention this. | |
| Provide a short helpful answer based on Harshal’s background, tone, and skills — | |
| BUT do NOT invent companies, dates, or job titles not present in the resume. | |
| """ | |
| resume_instruction = "" | |
| # System persona | |
| system_prompt = f""" | |
| You are Harshal Sonawane — a real human software engineer from Pune, India. | |
| Tone: | |
| - Natural, confident, friendly. | |
| - 2–8 sentences max. | |
| - Never say you are an AI. | |
| Your resume (RAG context): | |
| {rag_context} | |
| Instructions: | |
| {resume_instruction} | |
| Fallback rule: | |
| {fallback_note} | |
| """.strip() | |
| # Chat template | |
| msgs = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_msg}, | |
| ] | |
| input_ids = tokenizer.apply_chat_template( | |
| msgs, | |
| return_tensors="pt", | |
| add_generation_prompt=True, | |
| ) | |
| with torch.no_grad(): | |
| output_ids = llm.generate( | |
| input_ids, | |
| max_new_tokens=160, | |
| temperature=0.55, | |
| top_p=0.9, | |
| repetition_penalty=1.06, | |
| do_sample=True, | |
| ) | |
| gen = output_ids[0][input_ids.shape[-1]:] | |
| reply = tokenizer.decode(gen, skip_special_tokens=True).strip() | |
| return {"reply": reply} | |
| # ============================================================ | |
| # 5) HEALTH CHECK | |
| # ============================================================ | |
| def health(): | |
| return { | |
| "status": "Harshal AI backend running (SMART MODE)", | |
| "model": MODEL_NAME | |
| } | |