|
|
|
|
|
""" |
|
|
UltraData-Math L3 Generator - Hugging Face Space Demo |
|
|
""" |
|
|
|
|
|
import os |
|
|
import asyncio |
|
|
import json |
|
|
import gradio as gr |
|
|
|
|
|
from openai import AsyncOpenAI |
|
|
|
|
|
from qa_synthesis import QA_PROMPTS, get_qa_prompt |
|
|
from conversation_synthesis import CONVERSATION_PROMPTS, get_conversation_prompt |
|
|
from multistyle_rewrite import MULTISTYLE_PROMPTS, get_multistyle_prompt |
|
|
from knowledge_textbook import ( |
|
|
get_knowledge_extraction_prompt, |
|
|
get_textbook_exercise_prompt, |
|
|
TEXTBOOK_EXERCISE_PROMPTS, |
|
|
) |
|
|
from run_synthesis import ( |
|
|
parse_qa_output, |
|
|
parse_conversation_output, |
|
|
parse_rewrite_output, |
|
|
parse_knowledge_output, |
|
|
parse_textbook_output, |
|
|
) |
|
|
|
|
|
|
|
|
API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") |
|
|
DEFAULT_MODEL = "gpt-4o" |
|
|
|
|
|
|
|
|
async def call_api(prompt: str, model: str = DEFAULT_MODEL, temperature: float = 0.7) -> str: |
|
|
"""调用 API 生成内容""" |
|
|
if not API_KEY: |
|
|
return "Error: API Key not configured. Please contact administrator." |
|
|
|
|
|
client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL) |
|
|
try: |
|
|
response = await client.chat.completions.create( |
|
|
model=model, |
|
|
messages=[{"role": "user", "content": prompt}], |
|
|
temperature=temperature, |
|
|
max_tokens=4096, |
|
|
) |
|
|
return response.choices[0].message.content |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
def run_async(coro): |
|
|
"""运行异步函数""" |
|
|
try: |
|
|
loop = asyncio.get_event_loop() |
|
|
except RuntimeError: |
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
return loop.run_until_complete(coro) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def qa_synthesis(text: str, level: str, model: str, temperature: float): |
|
|
"""Q&A 问答对合成""" |
|
|
if not text.strip(): |
|
|
return "", "", "" |
|
|
|
|
|
prompt_template = get_qa_prompt(level) |
|
|
prompt = prompt_template.format(text=text) |
|
|
|
|
|
response = run_async(call_api(prompt, model, temperature)) |
|
|
parsed = parse_qa_output(response) |
|
|
|
|
|
return ( |
|
|
parsed.get("problem", ""), |
|
|
parsed.get("solution", ""), |
|
|
response |
|
|
) |
|
|
|
|
|
|
|
|
def conversation_synthesis(text: str, style: str, model: str, temperature: float): |
|
|
"""多轮对话合成""" |
|
|
if not text.strip(): |
|
|
return "", "" |
|
|
|
|
|
prompt_template = get_conversation_prompt(style) |
|
|
prompt = prompt_template.format(text=text) |
|
|
|
|
|
response = run_async(call_api(prompt, model, temperature)) |
|
|
parsed = parse_conversation_output(response) |
|
|
|
|
|
return parsed.get("content", response), response |
|
|
|
|
|
|
|
|
def rewrite_synthesis(text: str, style: str, model: str, temperature: float): |
|
|
"""多风格改写""" |
|
|
if not text.strip(): |
|
|
return "", "" |
|
|
|
|
|
prompt_template = get_multistyle_prompt(style) |
|
|
prompt = prompt_template.format(text=text) |
|
|
|
|
|
response = run_async(call_api(prompt, model, temperature)) |
|
|
parsed = parse_rewrite_output(response) |
|
|
|
|
|
return parsed.get("rewritten", response), response |
|
|
|
|
|
|
|
|
def knowledge_extraction(text: str, model: str, temperature: float): |
|
|
"""知识点提取""" |
|
|
if not text.strip(): |
|
|
return "", "" |
|
|
|
|
|
prompt_template = get_knowledge_extraction_prompt() |
|
|
prompt = prompt_template.format(text=text) |
|
|
|
|
|
response = run_async(call_api(prompt, model, temperature)) |
|
|
parsed = parse_knowledge_output(response) |
|
|
|
|
|
knowledge_points = parsed.get("knowledge_points", []) |
|
|
formatted = "\n\n---\n\n".join(knowledge_points) if knowledge_points else "No knowledge points extracted." |
|
|
|
|
|
return formatted, response |
|
|
|
|
|
|
|
|
def textbook_exercise(knowledge_point: str, difficulty: str, model: str, temperature: float): |
|
|
"""教材练习生成""" |
|
|
if not knowledge_point.strip(): |
|
|
return "", "" |
|
|
|
|
|
prompt_template = get_textbook_exercise_prompt(difficulty) |
|
|
prompt = prompt_template.format(mathematical_knowledge_point=knowledge_point) |
|
|
|
|
|
response = run_async(call_api(prompt, model, temperature)) |
|
|
parsed = parse_textbook_output(response) |
|
|
|
|
|
return parsed.get("material", response), response |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
.gradio-container { |
|
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important; |
|
|
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important; |
|
|
} |
|
|
|
|
|
.main-title { |
|
|
font-weight: 700 !important; |
|
|
font-size: 2.2rem !important; |
|
|
background: linear-gradient(90deg, #e94560, #f39c12, #00d9ff) !important; |
|
|
-webkit-background-clip: text !important; |
|
|
-webkit-text-fill-color: transparent !important; |
|
|
background-clip: text !important; |
|
|
text-align: center !important; |
|
|
} |
|
|
|
|
|
.subtitle { |
|
|
text-align: center !important; |
|
|
color: #94a3b8 !important; |
|
|
font-size: 1rem !important; |
|
|
margin-bottom: 1.5rem !important; |
|
|
} |
|
|
|
|
|
.gr-button-primary { |
|
|
background: linear-gradient(135deg, #e94560 0%, #f39c12 100%) !important; |
|
|
border: none !important; |
|
|
font-weight: 600 !important; |
|
|
} |
|
|
|
|
|
.gr-button-primary:hover { |
|
|
transform: translateY(-2px) !important; |
|
|
box-shadow: 0 8px 25px rgba(233, 69, 96, 0.4) !important; |
|
|
} |
|
|
|
|
|
footer { |
|
|
display: none !important; |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(title="UltraData-Math L3 Generator", css=custom_css) as demo: |
|
|
gr.HTML('<h1 class="main-title">🧮 UltraData-Math L3 Generator</h1>') |
|
|
gr.HTML('<p class="subtitle">LLM-based Mathematical Data Synthesis Tool</p>') |
|
|
|
|
|
with gr.Row(): |
|
|
model_select = gr.Dropdown( |
|
|
choices=["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"], |
|
|
value="gpt-4o", |
|
|
label="Model", |
|
|
scale=1, |
|
|
) |
|
|
temperature = gr.Slider( |
|
|
minimum=0.0, maximum=1.5, value=0.7, step=0.1, |
|
|
label="Temperature", |
|
|
scale=1, |
|
|
) |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.TabItem("📝 Q&A Synthesis"): |
|
|
gr.Markdown("根据数学内容生成问答对,按教育难度分级。") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
qa_input = gr.Textbox( |
|
|
label="Input Mathematical Content", |
|
|
placeholder="Enter mathematical content here...", |
|
|
lines=8, |
|
|
) |
|
|
qa_level = gr.Radio( |
|
|
choices=list(QA_PROMPTS.keys()), |
|
|
value="high_school", |
|
|
label="Difficulty Level", |
|
|
) |
|
|
qa_btn = gr.Button("🚀 Generate Q&A", variant="primary") |
|
|
with gr.Column(): |
|
|
qa_problem = gr.Textbox(label="Generated Problem", lines=4) |
|
|
qa_solution = gr.Textbox(label="Generated Solution", lines=8) |
|
|
qa_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) |
|
|
|
|
|
qa_btn.click( |
|
|
qa_synthesis, |
|
|
inputs=[qa_input, qa_level, model_select, temperature], |
|
|
outputs=[qa_problem, qa_solution, qa_raw], |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("💬 Conversation Synthesis"): |
|
|
gr.Markdown("将数学内容转换为多轮对话格式。") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
conv_input = gr.Textbox( |
|
|
label="Input Mathematical Content", |
|
|
placeholder="Enter mathematical content here...", |
|
|
lines=8, |
|
|
) |
|
|
conv_style = gr.Radio( |
|
|
choices=list(CONVERSATION_PROMPTS.keys()), |
|
|
value="teacher_student", |
|
|
label="Conversation Style", |
|
|
) |
|
|
conv_btn = gr.Button("🚀 Generate Conversation", variant="primary") |
|
|
with gr.Column(): |
|
|
conv_output = gr.Textbox(label="Generated Conversation", lines=15) |
|
|
conv_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) |
|
|
|
|
|
conv_btn.click( |
|
|
conversation_synthesis, |
|
|
inputs=[conv_input, conv_style, model_select, temperature], |
|
|
outputs=[conv_output, conv_raw], |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("✨ Multi-style Rewrite"): |
|
|
gr.Markdown("将数学内容改写为不同风格。") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
rewrite_input = gr.Textbox( |
|
|
label="Input Mathematical Content", |
|
|
placeholder="Enter mathematical content here...", |
|
|
lines=8, |
|
|
) |
|
|
rewrite_style = gr.Radio( |
|
|
choices=list(MULTISTYLE_PROMPTS.keys()), |
|
|
value="textbook", |
|
|
label="Rewrite Style", |
|
|
) |
|
|
rewrite_btn = gr.Button("🚀 Rewrite", variant="primary") |
|
|
with gr.Column(): |
|
|
rewrite_output = gr.Textbox(label="Rewritten Content", lines=15) |
|
|
rewrite_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) |
|
|
|
|
|
rewrite_btn.click( |
|
|
rewrite_synthesis, |
|
|
inputs=[rewrite_input, rewrite_style, model_select, temperature], |
|
|
outputs=[rewrite_output, rewrite_raw], |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("📚 Knowledge Extraction"): |
|
|
gr.Markdown("从数学内容中提取定义、定理、性质等知识点。") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
know_input = gr.Textbox( |
|
|
label="Input Mathematical Content", |
|
|
placeholder="Enter mathematical content here...", |
|
|
lines=10, |
|
|
) |
|
|
know_btn = gr.Button("🚀 Extract Knowledge", variant="primary") |
|
|
with gr.Column(): |
|
|
know_output = gr.Textbox(label="Extracted Knowledge Points", lines=15) |
|
|
know_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) |
|
|
|
|
|
know_btn.click( |
|
|
knowledge_extraction, |
|
|
inputs=[know_input, model_select, temperature], |
|
|
outputs=[know_output, know_raw], |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("📖 Textbook Exercise"): |
|
|
gr.Markdown("基于知识点生成不同难度的教材式练习。") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
textbook_input = gr.Textbox( |
|
|
label="Input Knowledge Point", |
|
|
placeholder="Enter a mathematical knowledge point...", |
|
|
lines=6, |
|
|
) |
|
|
textbook_diff = gr.Radio( |
|
|
choices=list(TEXTBOOK_EXERCISE_PROMPTS.keys()), |
|
|
value="easy", |
|
|
label="Difficulty", |
|
|
) |
|
|
textbook_btn = gr.Button("🚀 Generate Exercise", variant="primary") |
|
|
with gr.Column(): |
|
|
textbook_output = gr.Textbox(label="Generated Exercise Material", lines=15) |
|
|
textbook_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) |
|
|
|
|
|
textbook_btn.click( |
|
|
textbook_exercise, |
|
|
inputs=[textbook_input, textbook_diff, model_select, temperature], |
|
|
outputs=[textbook_output, textbook_raw], |
|
|
) |
|
|
|
|
|
gr.HTML(""" |
|
|
<div style="text-align: center; margin-top: 2rem; padding: 1rem; color: #64748b; font-size: 0.85rem;"> |
|
|
<p>🔬 <strong>UltraData-Math L3 Generator</strong> - Part of the UltraData-Math Project</p> |
|
|
<p>LLM-based data synthesis for Q&A, conversations, rewriting, and more.</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(ssr_mode=False) |
|
|
|