Spaces:

Harsh123007
/

harshal-portfolio-ai

Sleeping

App Files Files Community

harshal-portfolio-ai / main.py

Harsh123007

Update main.py

9035251 verified 9 days ago

raw

history blame contribute delete

6.7 kB

	# from fastapi import FastAPI
	# from pydantic import BaseModel
	# from fastapi.openapi.utils import get_openapi
	# from transformers import AutoTokenizer, AutoModelForCausalLM
	# import torch

	# app = FastAPI(
	# title="Harshal AI Backend",
	# version="1.0.0",
	# )

	# MODEL_NAME = "Qwen/Qwen2.5-0.5B"

	# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	# model = AutoModelForCausalLM.from_pretrained(
	# MODEL_NAME,
	# torch_dtype=torch.float32,
	# device_map="cpu",
	# )

	# class ChatMessage(BaseModel):
	# messages: list

	# @app.get("/")
	# def home():
	# return {"message": "Harshal AI backend running with Qwen 0.5B!"}

	# @app.post("/chat")
	# def chat(body: ChatMessage):
	# user_msg = body.messages[-1]["content"]
	# prompt = f"User: {user_msg}\nAssistant:"

	# inputs = tokenizer(prompt, return_tensors="pt")
	# outputs = model.generate(
	# **inputs,
	# max_new_tokens=120,
	# pad_token_id=tokenizer.eos_token_id,
	# temperature=0.4,
	# )

	# text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# reply = text.split("Assistant:")[-1].strip()
	# return {"reply": reply}

	# @app.get("/openapi.json")
	# def openapi_json():
	# return get_openapi(
	# title="Harshal AI Backend",
	# version="1.0.0",
	# routes=app.routes
	# )


	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from typing import List
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from sentence_transformers import SentenceTransformer
	from pypdf import PdfReader
	import torch, os

	app = FastAPI(title="Harshal AI Backend", version="1.0.0")

	# CORS (Next.js frontend)
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ============================================================
	# 1) LOAD MAIN MODEL (Phi-3 Mini — good balance of quality/speed)
	# ============================================================

	MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	llm = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	dtype=torch.float32, # instead of torch_dtype
	)
	llm.eval()

	# ============================================================
	# 2) LOAD EMBEDDINGS + BUILD RAG FROM resume.pdf
	# ============================================================

	EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
	embedder = SentenceTransformer(EMBED_MODEL)

	RESUME_FILE = "resume.pdf"
	resume_rag = None


	def chunk_text(text, max_chars=450, overlap=80):
	"""Simple overlapping chunks."""
	text = " ".join(text.split())
	chunks, start = [], 0

	while start < len(text):
	end = start + max_chars
	chunks.append(text[start:end])
	start = end - overlap

	return chunks


	def build_rag():
	"""Reads resume.pdf → chunks → embeddings."""
	global resume_rag

	if not os.path.exists(RESUME_FILE):
	print("⚠ resume.pdf NOT FOUND — RAG disabled.")
	return

	reader = PdfReader(RESUME_FILE)
	full_text = ""

	for page in reader.pages:
	full_text += page.extract_text() or ""

	chunks = chunk_text(full_text)

	embeddings = embedder.encode(
	chunks, convert_to_tensor=True, normalize_embeddings=True
	)

	resume_rag = {"chunks": chunks, "embs": embeddings}
	print("✅ Resume RAG built with", len(chunks), "chunks")


	build_rag()


	def retrieve_rag(query, top_k=3):
	"""Find most relevant resume chunks."""
	if resume_rag is None:
	return ""

	q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
	sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
	top = torch.topk(sims, k=min(top_k, len(sims)))

	return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)


	# ============================================================
	# 3) REQUEST MODELS
	# ============================================================

	class Message(BaseModel):
	role: str
	content: str

	class ChatReq(BaseModel):
	messages: List[Message]


	# ============================================================
	# 4) SMART MODE CHAT ROUTE (Resume + Fallback)
	# ============================================================

	@app.post("/chat")
	def chat(req: ChatReq):
	user_msg = req.messages[-1].content.strip()

	# RAG: take from resume
	rag_context = retrieve_rag(user_msg)

	# Detect if we found resume info
	has_relevant_rag = rag_context and len(rag_context) > 40

	# SMART RULE:
	# If resume has relevant info → answer ONLY from resume
	# If resume has nothing → fallback using general persona
	if has_relevant_rag:
	fallback_note = ""
	resume_instruction = """
	Use ONLY the details provided in the resume context.
	Do NOT invent any new companies, dates, or roles.
	Stay strictly consistent with:
	- Current company: Börm Bruckmeier Infotech India Pvt. Ltd.
	- Past experience: SarvaTech Consultants (Internship)
	- Personal projects listed in resume.
	"""
	else:
	fallback_note = """
	The resume does NOT mention this.
	Provide a short helpful answer based on Harshal’s background, tone, and skills —
	BUT do NOT invent companies, dates, or job titles not present in the resume.
	"""
	resume_instruction = ""

	# System persona
	system_prompt = f"""
	You are Harshal Sonawane — a real human software engineer from Pune, India.

	Tone:
	- Natural, confident, friendly.
	- 2–8 sentences max.
	- Never say you are an AI.

	Your resume (RAG context):
	{rag_context}

	Instructions:
	{resume_instruction}

	Fallback rule:
	{fallback_note}
	""".strip()

	# Chat template
	msgs = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_msg},
	]

	input_ids = tokenizer.apply_chat_template(
	msgs,
	return_tensors="pt",
	add_generation_prompt=True,
	)

	with torch.no_grad():
	output_ids = llm.generate(
	input_ids,
	max_new_tokens=160,
	temperature=0.55,
	top_p=0.9,
	repetition_penalty=1.06,
	do_sample=True,
	)

	gen = output_ids[0][input_ids.shape[-1]:]
	reply = tokenizer.decode(gen, skip_special_tokens=True).strip()

	return {"reply": reply}


	# ============================================================
	# 5) HEALTH CHECK
	# ============================================================

	@app.get("/")
	def health():
	return {
	"status": "Harshal AI backend running (SMART MODE)",
	"model": MODEL_NAME
	}