Spaces:
Running
Running
File size: 9,126 Bytes
f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab 08a5a31 f2200ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 |
import os
from typing import Dict, List
import asyncio
from openai import OpenAI
from langchain_community.utilities import ArxivAPIWrapper, SerpAPIWrapper
from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
load_dotenv()
# Initialize OpenRouter client for LLM calls
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENROUTER_API_KEY"),
)
# Model for search/retrieval tasks
SEARCH_MODEL = "google/gemini-2.5-flash-lite"
# Initialize search tools
semantic_scholar = SemanticScholarQueryRun()
google_scholar = SerpAPIWrapper(params={"engine": "google_scholar"})
arxiv_search = ArxivAPIWrapper()
tavily_search = TavilySearchResults(max_results=5)
def combine_critiques(critique_points: List[Dict]) -> Dict[str, str]:
"""
Combine critique points from multiple reviews into categories
Args:
critique_points: List of critique dictionaries
Returns:
Dictionary with combined critiques per category
"""
categories = ["Methodology", "Clarity", "Experiments", "Significance", "Novelty"]
combined = {cat: [] for cat in categories}
for review in critique_points:
for category in categories:
if category in review and review[category]:
combined[category].extend(review[category])
# Join into strings
for category in categories:
combined[category] = " | ".join(combined[category]) if combined[category] else "No critiques"
return combined
async def run_search_tool(tool_name: str, tool_func, query: str) -> str:
"""Run a search tool with error handling"""
try:
result = await asyncio.to_thread(tool_func, query)
return str(result) if result else ""
except Exception as e:
print(f"{tool_name} search failed: {e}")
return ""
async def search_sota(paper_title: str, paper_abstract: str, retries: int = 3) -> str:
"""
Search for state-of-the-art research related to the paper
Args:
paper_title: Paper title
paper_abstract: Paper abstract
retries: Maximum retry attempts
Returns:
Summary of SoTA findings
"""
# Create search query
search_query = f"{paper_title} recent advances methodology"
# Run multiple searches in parallel
search_tasks = [
run_search_tool("Tavily", tavily_search.run, search_query),
run_search_tool("ArXiv", arxiv_search.run, search_query[:300]),
run_search_tool("SemanticScholar", semantic_scholar.run, paper_title),
]
search_results = await asyncio.gather(*search_tasks)
# Combine all search results
combined_results = "\n\n".join([
f"=== Tavily Results ===\n{search_results[0]}" if search_results[0] else "",
f"=== ArXiv Results ===\n{search_results[1]}" if search_results[1] else "",
f"=== Semantic Scholar Results ===\n{search_results[2]}" if search_results[2] else "",
])
if not combined_results.strip():
return "No SoTA research found from available sources."
# Use LLM to synthesize the results
system_prompt = """
You are an expert at synthesizing academic research findings.
Summarize the search results to identify state-of-the-art approaches and recent advances.
Focus on methodologies, key findings, and how they relate to the paper being reviewed.
"""
user_prompt = f"""
Paper Title: {paper_title}
Paper Abstract: {paper_abstract[:500]}
Search Results:
{combined_results[:4000]}
Provide a concise summary of the state-of-the-art research relevant to this paper.
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
for attempt in range(retries):
try:
response = await asyncio.to_thread(
client.chat.completions.create,
model=SEARCH_MODEL,
messages=messages,
max_tokens=2048,
)
if not response.choices or not response.choices[0].message.content.strip():
raise ValueError("Empty response from API")
return response.choices[0].message.content.strip()
except Exception as e:
wait_time = 2 ** attempt
print(f"SoTA synthesis attempt {attempt + 1} failed: {e}")
if attempt < retries - 1:
await asyncio.sleep(wait_time)
else:
# Return raw results if synthesis fails
return f"Raw search results (synthesis failed):\n{combined_results[:2000]}"
async def retrieve_evidence_for_category(
category: str,
critiques: str,
retries: int = 3
) -> str:
"""
Retrieve evidence for critiques in a specific category
Args:
category: Category name (e.g., "Methodology")
critiques: Combined critique text
retries: Maximum retry attempts
Returns:
Evidence findings
"""
if critiques == "No critiques" or not critiques.strip():
return f"No critiques to validate for {category}"
# Create targeted search query
search_query = f"{category} research validation {critiques[:200]}"
# Run search
try:
tavily_result = await run_search_tool("Tavily", tavily_search.run, search_query)
arxiv_result = await run_search_tool("ArXiv", arxiv_search.run, search_query[:200])
combined = f"{tavily_result}\n{arxiv_result}".strip()
if not combined:
return f"No evidence found for {category} critiques"
# Use LLM to analyze relevance
system_prompt = f"""
You are an expert at evaluating academic critiques.
Analyze the search results to find evidence that supports or contradicts the critiques.
Focus on the {category} aspect.
"""
user_prompt = f"""
Critiques for {category}: {critiques}
Search Results:
{combined[:2000]}
Summarize the evidence found that relates to these critiques.
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
for attempt in range(retries):
try:
response = await asyncio.to_thread(
client.chat.completions.create,
model=SEARCH_MODEL,
messages=messages,
max_tokens=1024,
)
if response.choices and response.choices[0].message.content.strip():
return response.choices[0].message.content.strip()
except Exception as e:
if attempt < retries - 1:
await asyncio.sleep(2 ** attempt)
return f"Evidence retrieval completed for {category}"
except Exception as e:
return f"Error retrieving evidence for {category}: {str(e)}"
async def retrieve_evidence(combined_critiques: Dict[str, str]) -> Dict[str, str]:
"""
Retrieve evidence for all critique categories
Args:
combined_critiques: Dictionary of combined critiques per category
Returns:
Dictionary of evidence per category
"""
evidence_results = {}
# Process categories with rate limiting
for category, critiques in combined_critiques.items():
evidence_results[category] = await retrieve_evidence_for_category(
category,
critiques
)
# Delay between requests
await asyncio.sleep(1)
return evidence_results
async def search_and_retrieve(
paper_title: str,
paper_abstract: str,
critique_points: List[Dict]
) -> Dict:
"""
Complete search and retrieval pipeline
Args:
paper_title: Paper title
paper_abstract: Paper abstract
critique_points: List of critique point dictionaries
Returns:
Dictionary with SoTA results, combined critiques, and evidence
"""
try:
# Step 1: Search for SoTA research
sota_results = await search_sota(paper_title, paper_abstract)
# Step 2: Combine critique points
combined_critiques = combine_critiques(critique_points)
# Step 3: Retrieve evidence for critiques
evidence = await retrieve_evidence(combined_critiques)
return {
"SoTA_Results": sota_results,
"Combined_Critiques": combined_critiques,
"Retrieved_Evidence": evidence
}
except Exception as e:
return {
"error": str(e),
"SoTA_Results": "",
"Combined_Critiques": {},
"Retrieved_Evidence": {}
}
|