# -*- coding: utf-8 -*- """ UltraData-Math L3 Generator - Hugging Face Space Demo """ import os import asyncio import json import gradio as gr from openai import AsyncOpenAI from qa_synthesis import QA_PROMPTS, get_qa_prompt from conversation_synthesis import CONVERSATION_PROMPTS, get_conversation_prompt from multistyle_rewrite import MULTISTYLE_PROMPTS, get_multistyle_prompt from knowledge_textbook import ( get_knowledge_extraction_prompt, get_textbook_exercise_prompt, TEXTBOOK_EXERCISE_PROMPTS, ) from run_synthesis import ( parse_qa_output, parse_conversation_output, parse_rewrite_output, parse_knowledge_output, parse_textbook_output, ) # API 配置从环境变量读取(通过 HF Secrets 设置) API_KEY = os.getenv("OPENAI_API_KEY") BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") DEFAULT_MODEL = "gpt-4o" async def call_api(prompt: str, model: str = DEFAULT_MODEL, temperature: float = 0.7) -> str: """调用 API 生成内容""" if not API_KEY: return "Error: API Key not configured. Please contact administrator." client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL) try: response = await client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], temperature=temperature, max_tokens=4096, ) return response.choices[0].message.content except Exception as e: return f"Error: {str(e)}" def run_async(coro): """运行异步函数""" try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) return loop.run_until_complete(coro) # ============================================================================ # Task Handlers # ============================================================================ def qa_synthesis(text: str, level: str, model: str, temperature: float): """Q&A 问答对合成""" if not text.strip(): return "", "", "" prompt_template = get_qa_prompt(level) prompt = prompt_template.format(text=text) response = run_async(call_api(prompt, model, temperature)) parsed = parse_qa_output(response) return ( parsed.get("problem", ""), parsed.get("solution", ""), response ) def conversation_synthesis(text: str, style: str, model: str, temperature: float): """多轮对话合成""" if not text.strip(): return "", "" prompt_template = get_conversation_prompt(style) prompt = prompt_template.format(text=text) response = run_async(call_api(prompt, model, temperature)) parsed = parse_conversation_output(response) return parsed.get("content", response), response def rewrite_synthesis(text: str, style: str, model: str, temperature: float): """多风格改写""" if not text.strip(): return "", "" prompt_template = get_multistyle_prompt(style) prompt = prompt_template.format(text=text) response = run_async(call_api(prompt, model, temperature)) parsed = parse_rewrite_output(response) return parsed.get("rewritten", response), response def knowledge_extraction(text: str, model: str, temperature: float): """知识点提取""" if not text.strip(): return "", "" prompt_template = get_knowledge_extraction_prompt() prompt = prompt_template.format(text=text) response = run_async(call_api(prompt, model, temperature)) parsed = parse_knowledge_output(response) knowledge_points = parsed.get("knowledge_points", []) formatted = "\n\n---\n\n".join(knowledge_points) if knowledge_points else "No knowledge points extracted." return formatted, response def textbook_exercise(knowledge_point: str, difficulty: str, model: str, temperature: float): """教材练习生成""" if not knowledge_point.strip(): return "", "" prompt_template = get_textbook_exercise_prompt(difficulty) prompt = prompt_template.format(mathematical_knowledge_point=knowledge_point) response = run_async(call_api(prompt, model, temperature)) parsed = parse_textbook_output(response) return parsed.get("material", response), response # ============================================================================ # Gradio UI # ============================================================================ custom_css = """ .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important; } .main-title { font-weight: 700 !important; font-size: 2.2rem !important; background: linear-gradient(90deg, #e94560, #f39c12, #00d9ff) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; background-clip: text !important; text-align: center !important; } .subtitle { text-align: center !important; color: #94a3b8 !important; font-size: 1rem !important; margin-bottom: 1.5rem !important; } .gr-button-primary { background: linear-gradient(135deg, #e94560 0%, #f39c12 100%) !important; border: none !important; font-weight: 600 !important; } .gr-button-primary:hover { transform: translateY(-2px) !important; box-shadow: 0 8px 25px rgba(233, 69, 96, 0.4) !important; } footer { display: none !important; } """ with gr.Blocks(title="UltraData-Math L3 Generator", css=custom_css) as demo: gr.HTML('
LLM-based Mathematical Data Synthesis Tool
') with gr.Row(): model_select = gr.Dropdown( choices=["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"], value="gpt-4o", label="Model", scale=1, ) temperature = gr.Slider( minimum=0.0, maximum=1.5, value=0.7, step=0.1, label="Temperature", scale=1, ) with gr.Tabs(): # Q&A Synthesis Tab with gr.TabItem("📝 Q&A Synthesis"): gr.Markdown("根据数学内容生成问答对,按教育难度分级。") with gr.Row(): with gr.Column(): qa_input = gr.Textbox( label="Input Mathematical Content", placeholder="Enter mathematical content here...", lines=8, ) qa_level = gr.Radio( choices=list(QA_PROMPTS.keys()), value="high_school", label="Difficulty Level", ) qa_btn = gr.Button("🚀 Generate Q&A", variant="primary") with gr.Column(): qa_problem = gr.Textbox(label="Generated Problem", lines=4) qa_solution = gr.Textbox(label="Generated Solution", lines=8) qa_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) qa_btn.click( qa_synthesis, inputs=[qa_input, qa_level, model_select, temperature], outputs=[qa_problem, qa_solution, qa_raw], ) # Conversation Synthesis Tab with gr.TabItem("💬 Conversation Synthesis"): gr.Markdown("将数学内容转换为多轮对话格式。") with gr.Row(): with gr.Column(): conv_input = gr.Textbox( label="Input Mathematical Content", placeholder="Enter mathematical content here...", lines=8, ) conv_style = gr.Radio( choices=list(CONVERSATION_PROMPTS.keys()), value="teacher_student", label="Conversation Style", ) conv_btn = gr.Button("🚀 Generate Conversation", variant="primary") with gr.Column(): conv_output = gr.Textbox(label="Generated Conversation", lines=15) conv_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) conv_btn.click( conversation_synthesis, inputs=[conv_input, conv_style, model_select, temperature], outputs=[conv_output, conv_raw], ) # Rewrite Tab with gr.TabItem("✨ Multi-style Rewrite"): gr.Markdown("将数学内容改写为不同风格。") with gr.Row(): with gr.Column(): rewrite_input = gr.Textbox( label="Input Mathematical Content", placeholder="Enter mathematical content here...", lines=8, ) rewrite_style = gr.Radio( choices=list(MULTISTYLE_PROMPTS.keys()), value="textbook", label="Rewrite Style", ) rewrite_btn = gr.Button("🚀 Rewrite", variant="primary") with gr.Column(): rewrite_output = gr.Textbox(label="Rewritten Content", lines=15) rewrite_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) rewrite_btn.click( rewrite_synthesis, inputs=[rewrite_input, rewrite_style, model_select, temperature], outputs=[rewrite_output, rewrite_raw], ) # Knowledge Extraction Tab with gr.TabItem("📚 Knowledge Extraction"): gr.Markdown("从数学内容中提取定义、定理、性质等知识点。") with gr.Row(): with gr.Column(): know_input = gr.Textbox( label="Input Mathematical Content", placeholder="Enter mathematical content here...", lines=10, ) know_btn = gr.Button("🚀 Extract Knowledge", variant="primary") with gr.Column(): know_output = gr.Textbox(label="Extracted Knowledge Points", lines=15) know_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) know_btn.click( knowledge_extraction, inputs=[know_input, model_select, temperature], outputs=[know_output, know_raw], ) # Textbook Exercise Tab with gr.TabItem("📖 Textbook Exercise"): gr.Markdown("基于知识点生成不同难度的教材式练习。") with gr.Row(): with gr.Column(): textbook_input = gr.Textbox( label="Input Knowledge Point", placeholder="Enter a mathematical knowledge point...", lines=6, ) textbook_diff = gr.Radio( choices=list(TEXTBOOK_EXERCISE_PROMPTS.keys()), value="easy", label="Difficulty", ) textbook_btn = gr.Button("🚀 Generate Exercise", variant="primary") with gr.Column(): textbook_output = gr.Textbox(label="Generated Exercise Material", lines=15) textbook_raw = gr.Textbox(label="Raw Response", lines=4, visible=False) textbook_btn.click( textbook_exercise, inputs=[textbook_input, textbook_diff, model_select, temperature], outputs=[textbook_output, textbook_raw], ) gr.HTML("""🔬 UltraData-Math L3 Generator - Part of the UltraData-Math Project
LLM-based data synthesis for Q&A, conversations, rewriting, and more.