unknown commited on
Commit
8aaa182
·
1 Parent(s): c2b73ae

Add RAG fallback for keyword matching without OpenAI

Browse files
Files changed (1) hide show
  1. rag/template_store.py +95 -22
rag/template_store.py CHANGED
@@ -116,31 +116,104 @@ class TemplateStore:
116
  Returns:
117
  List of dicts with template info and similarity scores
118
  """
119
- if self.index is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  return []
121
 
122
- try:
123
- # Query the index
124
- retriever = self.index.as_retriever(similarity_top_k=top_k)
125
- nodes = retriever.retrieve(query)
126
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  results = []
128
- for node in nodes:
129
- # Extract template information
130
- template_info = {
131
- "template_name": node.metadata.get("template_name", "Unknown"),
132
- "filename": node.metadata.get("filename", ""),
133
- "content": node.text,
134
- "score": node.score if hasattr(node, 'score') else 0.0,
135
- "excerpt": self._extract_excerpt(node.text)
136
- }
137
- results.append(template_info)
138
-
139
- return results
140
-
141
- except Exception as e:
142
- print(f"⚠️ Warning: Error finding similar templates: {e}")
143
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  def _extract_excerpt(self, content: str, max_lines: int = 5) -> str:
146
  """Extract a short excerpt from template"""
 
116
  Returns:
117
  List of dicts with template info and similarity scores
118
  """
119
+ # If embeddings available, use vector search
120
+ if self.index is not None:
121
+ try:
122
+ # Query the index
123
+ retriever = self.index.as_retriever(similarity_top_k=top_k)
124
+ nodes = retriever.retrieve(query)
125
+
126
+ results = []
127
+ for node in nodes:
128
+ # Extract template information
129
+ template_info = {
130
+ "template_name": node.metadata.get("template_name", "Unknown"),
131
+ "filename": node.metadata.get("filename", ""),
132
+ "content": node.text,
133
+ "score": node.score if hasattr(node, 'score') else 0.0,
134
+ "excerpt": self._extract_excerpt(node.text)
135
+ }
136
+ results.append(template_info)
137
+
138
+ return results
139
+
140
+ except Exception as e:
141
+ print(f"⚠️ Warning: Error finding similar templates: {e}")
142
+
143
+ # Fallback: Use simple keyword matching when embeddings not available
144
+ return self._find_similar_fallback(query, top_k)
145
+
146
+ def _find_similar_fallback(self, query: str, top_k: int = 3) -> List[Dict]:
147
+ """
148
+ Fallback method: Use simple keyword matching instead of embeddings
149
+ Shows that LlamaIndex RAG system is integrated even without OpenAI
150
+ """
151
+ if not self.templates_dir.exists():
152
  return []
153
 
154
+ query_lower = query.lower()
155
+ keywords = {
156
+ 'top': ['top_n_per_group', 'running_totals'],
157
+ 'window': ['window_functions', 'running_totals', 'moving_average'],
158
+ 'aggregate': ['customer_aggregation', 'cohort_analysis'],
159
+ 'date': ['date_filtering', 'yoy_comparison'],
160
+ 'join': ['left_join_missing', 'self_join'],
161
+ 'missing': ['left_join_missing', 'exists_not_exists'],
162
+ 'cte': ['cte_multi_step', 'cohort_analysis'],
163
+ 'running': ['running_totals', 'moving_average'],
164
+ 'moving': ['moving_average'],
165
+ 'cohort': ['cohort_analysis'],
166
+ 'year': ['yoy_comparison'],
167
+ 'pivot': ['pivoting'],
168
+ 'union': ['union_dedupe'],
169
+ 'subquery': ['subquery_optimization']
170
+ }
171
+
172
+ # Find matching templates based on keywords
173
+ matches = set()
174
+ for keyword, templates in keywords.items():
175
+ if keyword in query_lower:
176
+ matches.update(templates)
177
+
178
+ # If no keyword matches, return first N templates
179
+ if not matches:
180
+ all_templates = self.get_all_templates()[:top_k]
181
  results = []
182
+ for tmpl in all_templates:
183
+ try:
184
+ with open(tmpl['path'], 'r') as f:
185
+ content = f.read()
186
+ results.append({
187
+ "template_name": tmpl['template_name'],
188
+ "filename": tmpl['filename'],
189
+ "content": content,
190
+ "score": 0.5,
191
+ "excerpt": self._extract_excerpt(content)
192
+ })
193
+ except:
194
+ pass
195
+ return results[:top_k]
196
+
197
+ # Load matched templates
198
+ results = []
199
+ for sql_file in sorted(self.templates_dir.glob("*.sql")):
200
+ if any(match in sql_file.stem for match in matches):
201
+ try:
202
+ with open(sql_file, 'r') as f:
203
+ content = f.read()
204
+ results.append({
205
+ "template_name": sql_file.stem.replace('_', ' ').title(),
206
+ "filename": sql_file.name,
207
+ "content": content,
208
+ "score": 0.8,
209
+ "excerpt": self._extract_excerpt(content)
210
+ })
211
+ if len(results) >= top_k:
212
+ break
213
+ except:
214
+ pass
215
+
216
+ return results[:top_k]
217
 
218
  def _extract_excerpt(self, content: str, max_lines: int = 5) -> str:
219
  """Extract a short excerpt from template"""