ZhouChuYue
Initial commit: UltraData-Math L3 Generator Space
787a7ad
raw
history blame
12.5 kB
# -*- coding: utf-8 -*-
"""
UltraData-Math L3 Generator - Hugging Face Space Demo
"""
import os
import asyncio
import json
import gradio as gr
from openai import AsyncOpenAI
from qa_synthesis import QA_PROMPTS, get_qa_prompt
from conversation_synthesis import CONVERSATION_PROMPTS, get_conversation_prompt
from multistyle_rewrite import MULTISTYLE_PROMPTS, get_multistyle_prompt
from knowledge_textbook import (
get_knowledge_extraction_prompt,
get_textbook_exercise_prompt,
TEXTBOOK_EXERCISE_PROMPTS,
)
from run_synthesis import (
parse_qa_output,
parse_conversation_output,
parse_rewrite_output,
parse_knowledge_output,
parse_textbook_output,
)
# API 配置从环境变量读取(通过 HF Secrets 设置)
API_KEY = os.getenv("OPENAI_API_KEY")
BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
DEFAULT_MODEL = "gpt-4o"
async def call_api(prompt: str, model: str = DEFAULT_MODEL, temperature: float = 0.7) -> str:
"""调用 API 生成内容"""
if not API_KEY:
return "Error: API Key not configured. Please contact administrator."
client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
try:
response = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
max_tokens=4096,
)
return response.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
def run_async(coro):
"""运行异步函数"""
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop.run_until_complete(coro)
# ============================================================================
# Task Handlers
# ============================================================================
def qa_synthesis(text: str, level: str, model: str, temperature: float):
"""Q&A 问答对合成"""
if not text.strip():
return "", "", ""
prompt_template = get_qa_prompt(level)
prompt = prompt_template.format(text=text)
response = run_async(call_api(prompt, model, temperature))
parsed = parse_qa_output(response)
return (
parsed.get("problem", ""),
parsed.get("solution", ""),
response
)
def conversation_synthesis(text: str, style: str, model: str, temperature: float):
"""多轮对话合成"""
if not text.strip():
return "", ""
prompt_template = get_conversation_prompt(style)
prompt = prompt_template.format(text=text)
response = run_async(call_api(prompt, model, temperature))
parsed = parse_conversation_output(response)
return parsed.get("content", response), response
def rewrite_synthesis(text: str, style: str, model: str, temperature: float):
"""多风格改写"""
if not text.strip():
return "", ""
prompt_template = get_multistyle_prompt(style)
prompt = prompt_template.format(text=text)
response = run_async(call_api(prompt, model, temperature))
parsed = parse_rewrite_output(response)
return parsed.get("rewritten", response), response
def knowledge_extraction(text: str, model: str, temperature: float):
"""知识点提取"""
if not text.strip():
return "", ""
prompt_template = get_knowledge_extraction_prompt()
prompt = prompt_template.format(text=text)
response = run_async(call_api(prompt, model, temperature))
parsed = parse_knowledge_output(response)
knowledge_points = parsed.get("knowledge_points", [])
formatted = "\n\n---\n\n".join(knowledge_points) if knowledge_points else "No knowledge points extracted."
return formatted, response
def textbook_exercise(knowledge_point: str, difficulty: str, model: str, temperature: float):
"""教材练习生成"""
if not knowledge_point.strip():
return "", ""
prompt_template = get_textbook_exercise_prompt(difficulty)
prompt = prompt_template.format(mathematical_knowledge_point=knowledge_point)
response = run_async(call_api(prompt, model, temperature))
parsed = parse_textbook_output(response)
return parsed.get("material", response), response
# ============================================================================
# Gradio UI
# ============================================================================
custom_css = """
.gradio-container {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
}
.main-title {
font-weight: 700 !important;
font-size: 2.2rem !important;
background: linear-gradient(90deg, #e94560, #f39c12, #00d9ff) !important;
-webkit-background-clip: text !important;
-webkit-text-fill-color: transparent !important;
background-clip: text !important;
text-align: center !important;
}
.subtitle {
text-align: center !important;
color: #94a3b8 !important;
font-size: 1rem !important;
margin-bottom: 1.5rem !important;
}
.gr-button-primary {
background: linear-gradient(135deg, #e94560 0%, #f39c12 100%) !important;
border: none !important;
font-weight: 600 !important;
}
.gr-button-primary:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px rgba(233, 69, 96, 0.4) !important;
}
footer {
display: none !important;
}
"""
with gr.Blocks(title="UltraData-Math L3 Generator", css=custom_css) as demo:
gr.HTML('<h1 class="main-title">🧮 UltraData-Math L3 Generator</h1>')
gr.HTML('<p class="subtitle">LLM-based Mathematical Data Synthesis Tool</p>')
with gr.Row():
model_select = gr.Dropdown(
choices=["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"],
value="gpt-4o",
label="Model",
scale=1,
)
temperature = gr.Slider(
minimum=0.0, maximum=1.5, value=0.7, step=0.1,
label="Temperature",
scale=1,
)
with gr.Tabs():
# Q&A Synthesis Tab
with gr.TabItem("📝 Q&A Synthesis"):
gr.Markdown("根据数学内容生成问答对,按教育难度分级。")
with gr.Row():
with gr.Column():
qa_input = gr.Textbox(
label="Input Mathematical Content",
placeholder="Enter mathematical content here...",
lines=8,
)
qa_level = gr.Radio(
choices=list(QA_PROMPTS.keys()),
value="high_school",
label="Difficulty Level",
)
qa_btn = gr.Button("🚀 Generate Q&A", variant="primary")
with gr.Column():
qa_problem = gr.Textbox(label="Generated Problem", lines=4)
qa_solution = gr.Textbox(label="Generated Solution", lines=8)
qa_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)
qa_btn.click(
qa_synthesis,
inputs=[qa_input, qa_level, model_select, temperature],
outputs=[qa_problem, qa_solution, qa_raw],
)
# Conversation Synthesis Tab
with gr.TabItem("💬 Conversation Synthesis"):
gr.Markdown("将数学内容转换为多轮对话格式。")
with gr.Row():
with gr.Column():
conv_input = gr.Textbox(
label="Input Mathematical Content",
placeholder="Enter mathematical content here...",
lines=8,
)
conv_style = gr.Radio(
choices=list(CONVERSATION_PROMPTS.keys()),
value="teacher_student",
label="Conversation Style",
)
conv_btn = gr.Button("🚀 Generate Conversation", variant="primary")
with gr.Column():
conv_output = gr.Textbox(label="Generated Conversation", lines=15)
conv_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)
conv_btn.click(
conversation_synthesis,
inputs=[conv_input, conv_style, model_select, temperature],
outputs=[conv_output, conv_raw],
)
# Rewrite Tab
with gr.TabItem("✨ Multi-style Rewrite"):
gr.Markdown("将数学内容改写为不同风格。")
with gr.Row():
with gr.Column():
rewrite_input = gr.Textbox(
label="Input Mathematical Content",
placeholder="Enter mathematical content here...",
lines=8,
)
rewrite_style = gr.Radio(
choices=list(MULTISTYLE_PROMPTS.keys()),
value="textbook",
label="Rewrite Style",
)
rewrite_btn = gr.Button("🚀 Rewrite", variant="primary")
with gr.Column():
rewrite_output = gr.Textbox(label="Rewritten Content", lines=15)
rewrite_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)
rewrite_btn.click(
rewrite_synthesis,
inputs=[rewrite_input, rewrite_style, model_select, temperature],
outputs=[rewrite_output, rewrite_raw],
)
# Knowledge Extraction Tab
with gr.TabItem("📚 Knowledge Extraction"):
gr.Markdown("从数学内容中提取定义、定理、性质等知识点。")
with gr.Row():
with gr.Column():
know_input = gr.Textbox(
label="Input Mathematical Content",
placeholder="Enter mathematical content here...",
lines=10,
)
know_btn = gr.Button("🚀 Extract Knowledge", variant="primary")
with gr.Column():
know_output = gr.Textbox(label="Extracted Knowledge Points", lines=15)
know_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)
know_btn.click(
knowledge_extraction,
inputs=[know_input, model_select, temperature],
outputs=[know_output, know_raw],
)
# Textbook Exercise Tab
with gr.TabItem("📖 Textbook Exercise"):
gr.Markdown("基于知识点生成不同难度的教材式练习。")
with gr.Row():
with gr.Column():
textbook_input = gr.Textbox(
label="Input Knowledge Point",
placeholder="Enter a mathematical knowledge point...",
lines=6,
)
textbook_diff = gr.Radio(
choices=list(TEXTBOOK_EXERCISE_PROMPTS.keys()),
value="easy",
label="Difficulty",
)
textbook_btn = gr.Button("🚀 Generate Exercise", variant="primary")
with gr.Column():
textbook_output = gr.Textbox(label="Generated Exercise Material", lines=15)
textbook_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)
textbook_btn.click(
textbook_exercise,
inputs=[textbook_input, textbook_diff, model_select, temperature],
outputs=[textbook_output, textbook_raw],
)
gr.HTML("""
<div style="text-align: center; margin-top: 2rem; padding: 1rem; color: #64748b; font-size: 0.85rem;">
<p>🔬 <strong>UltraData-Math L3 Generator</strong> - Part of the UltraData-Math Project</p>
<p>LLM-based data synthesis for Q&A, conversations, rewriting, and more.</p>
</div>
""")
if __name__ == "__main__":
demo.launch(ssr_mode=False)