decodingdatascience commited on
Commit
d8177b2
·
verified ·
1 Parent(s): 39ed9de

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -0
  2. app.py +125 -0
  3. data/dds_logo.png +3 -0
  4. data/insurance.pdf +3 -0
  5. requirements.txt +8 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/dds_logo.png filter=lfs diff=lfs merge=lfs -text
37
+ data/insurance.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py — Insurance Q&A (RAG) with system prompt + simple config
2
+ import os
3
+ import gradio as gr
4
+ from pinecone import Pinecone, ServerlessSpec
5
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
6
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
7
+ from llama_index.embeddings.openai import OpenAIEmbedding
8
+ from llama_index.llms.openai import OpenAI
9
+
10
+ # --- System Prompt (polite + answer-from-document constraint) ---
11
+ SYSTEM_PROMPT = """You are Aisha, a polite and professional Insurance assistant.
12
+ Answer ONLY using the information found in the indexed insurance document(s).
13
+ If the answer is not in the document(s), say: "I couldn’t find that in the document."
14
+ Keep responses concise, helpful, and courteous.
15
+ """
16
+
17
+ # ===== Minimal CONFIG (only necessary keys) =====
18
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
19
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
+ if not PINECONE_API_KEY or not OPENAI_API_KEY:
21
+ raise RuntimeError("Missing PINECONE_API_KEY or OPENAI_API_KEY (set them in Space → Settings → Variables).")
22
+
23
+ DATA_DIR = "data" # Put insurance docs here (e.g., data/insurance.pdf)
24
+ LOGO_PATH = os.path.join(DATA_DIR, "dds_logo.png") # Mandatory logo
25
+ if not os.path.exists(LOGO_PATH):
26
+ raise RuntimeError("Logo not found: data/dds_logo.png.png (commit it to your Space repo).")
27
+
28
+ EMBED_MODEL = "text-embedding-3-small" # 1536-dim
29
+ LLM_MODEL = "gpt-4o-mini"
30
+ TOP_K = 4 # internal similarity_top_k
31
+
32
+ # ===== LlamaIndex / Pinecone (simple, fixed serverless: aws/us-east-1) =====
33
+ Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL, api_key=OPENAI_API_KEY)
34
+ Settings.llm = OpenAI(model=LLM_MODEL, api_key=OPENAI_API_KEY, system_prompt=SYSTEM_PROMPT)
35
+
36
+ pc = Pinecone(api_key=PINECONE_API_KEY)
37
+ def ensure_index(name: str, dim: int = 1536):
38
+ names = [i["name"] for i in pc.list_indexes()]
39
+ if name not in names:
40
+ pc.create_index(
41
+ name=name, dimension=dim, metric="cosine",
42
+ spec=ServerlessSpec(cloud="aws", region="us-east-1"),
43
+ )
44
+ return pc.Index(name)
45
+
46
+ # Fixed index name for simplicity
47
+ pinecone_index = ensure_index("dds-insurance-index", dim=1536)
48
+ vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
49
+
50
+ def bootstrap_index():
51
+ if not os.path.isdir(DATA_DIR):
52
+ raise RuntimeError("No 'data/' directory found. Commit your documents to data/ in the Space repo.")
53
+ docs = SimpleDirectoryReader(DATA_DIR).load_data()
54
+ if not docs:
55
+ raise RuntimeError("No documents found in data/. Add e.g., data/insurance.pdf")
56
+ storage_ctx = StorageContext.from_defaults(vector_store=vector_store)
57
+ VectorStoreIndex.from_documents(docs, storage_context=storage_ctx, show_progress=True)
58
+
59
+ bootstrap_index()
60
+
61
+ def answer(query: str) -> str:
62
+ if not query.strip():
63
+ return "Please enter a question (or select one from the FAQ list)."
64
+ index = VectorStoreIndex.from_vector_store(vector_store)
65
+ resp = index.as_query_engine(similarity_top_k=TOP_K).query(query)
66
+ return str(resp)
67
+
68
+ FAQS = [
69
+ "",
70
+ "What benefits are covered under the policy?",
71
+ "How do I file a claim and what documents are required?",
72
+ "What are the exclusions and limitations?",
73
+ "Is pre-authorization needed for hospitalization?",
74
+ "What is the reimbursement timeline?",
75
+ "How are outpatient vs inpatient services handled?",
76
+ "How can I check my network hospitals/clinics?",
77
+ "What is the co-pay or deductible policy?",
78
+ ]
79
+
80
+ def use_faq(selected_faq: str, free_text: str):
81
+ prompt = (selected_faq or "").strip() or (free_text or "").strip()
82
+ if not prompt:
83
+ return "", "Please select a FAQ or type your question."
84
+ return prompt, answer(prompt)
85
+
86
+ # ===== UI =====
87
+ CSS = """
88
+ .header { display:flex; flex-direction:column; align-items:center; gap:6px; }
89
+ .logo img { width:300px; height:300px; object-fit:contain; } /* fixed 300x300 */
90
+ .title { text-align:center; font-weight:700; font-size:1.4rem; margin:6px 0 0 0; }
91
+ .subnote { text-align:center; margin-top:-2px; opacity:0.8; }
92
+ """
93
+
94
+ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
95
+ with gr.Row():
96
+ with gr.Column():
97
+ gr.Markdown("<div class='header'>")
98
+ gr.Image(value=LOGO_PATH, show_label=False, elem_classes=["logo"])
99
+ gr.Markdown(
100
+ "<h1 class='title'>DDS Insurance Q&A — RAG Assistant</h1>"
101
+ "<p class='subnote'>Answers strictly from your insurance document(s)</p>"
102
+ )
103
+ gr.Markdown("</div>")
104
+
105
+ with gr.Row():
106
+ with gr.Column(scale=1):
107
+ gr.Markdown("### Ask from Frequently Asked Questions")
108
+ faq = gr.Dropdown(choices=FAQS, value=FAQS[0], label="Select a common question")
109
+
110
+ gr.Markdown("### Or type your question")
111
+ user_q = gr.Textbox(
112
+ label="Your question",
113
+ placeholder="e.g., What is covered under outpatient benefits?",
114
+ lines=2
115
+ )
116
+ ask_btn = gr.Button("Ask", variant="primary")
117
+
118
+ with gr.Column(scale=1):
119
+ chosen_prompt = gr.Textbox(label="Query sent", interactive=False)
120
+ answer_box = gr.Markdown()
121
+
122
+ ask_btn.click(use_faq, inputs=[faq, user_q], outputs=[chosen_prompt, answer_box])
123
+
124
+ if __name__ == "__main__":
125
+ demo.launch()
data/dds_logo.png ADDED

Git LFS Details

  • SHA256: b42f21a6a20156eabe67a0b0bfe99984b05ca38324186c5a1277d1d0a51e20a8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.42 MB
data/insurance.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536603a97eea5752c1447b7411ad4c03054d6d0f3a3bc1c887f3dc26de8e7892
3
+ size 1341586
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ pinecone-client>=5.0.1
3
+ openai>=1.51.0
4
+ llama-index>=0.11.0
5
+ llama-index-vector-stores-pinecone>=0.3.0
6
+ llama-index-embeddings-openai>=0.3.0
7
+ llama-index-llms-openai>=0.2.0
8
+ tiktoken>=0.7.0