| | """ |
| | 图表问答数据集审核系统 - Gradio 5.x 应用 |
| | 用于人工审核每个图表对应的问题和答案是否合理正确 |
| | """ |
| | import gradio as gr |
| | from data_manager import DataManager, data_manager |
| | from typing import Dict, List, Optional, Tuple, Any |
| | import json |
| | import os |
| | import base64 |
| |
|
| | |
| |
|
| | class AppState: |
| | """应用状态管理""" |
| | def __init__(self): |
| | self.current_source: str = "" |
| | self.current_chart_type: str = "" |
| | self.current_chart_id: str = "" |
| | self.current_model: str = "" |
| | self.all_paths: List[Dict] = [] |
| | self.current_index: int = -1 |
| | |
| | |
| | self.refresh_paths() |
| | |
| | def refresh_paths(self): |
| | """刷新所有图表路径""" |
| | self.all_paths = data_manager.get_all_chart_paths() |
| | |
| | def get_current_path(self) -> Optional[Dict]: |
| | """获取当前路径信息""" |
| | if 0 <= self.current_index < len(self.all_paths): |
| | return self.all_paths[self.current_index] |
| | return None |
| | |
| | def set_position(self, source: str, chart_type: str, chart_id: str, model: str): |
| | """设置当前位置""" |
| | self.current_source = source |
| | self.current_chart_type = chart_type |
| | self.current_chart_id = chart_id |
| | self.current_model = model |
| | |
| | |
| | for i, path in enumerate(self.all_paths): |
| | if (path['source'] == source and |
| | path['chart_type'] == chart_type and |
| | path['chart_id'] == chart_id and |
| | path['model'] == model): |
| | self.current_index = i |
| | break |
| | |
| | def navigate(self, direction: int) -> bool: |
| | """ |
| | 导航到上一个或下一个图表 |
| | |
| | Args: |
| | direction: 1 表示下一个,-1 表示上一个 |
| | |
| | Returns: |
| | 是否成功导航 |
| | """ |
| | new_index = self.current_index + direction |
| | if 0 <= new_index < len(self.all_paths): |
| | self.current_index = new_index |
| | path = self.all_paths[new_index] |
| | self.current_source = path['source'] |
| | self.current_chart_type = path['chart_type'] |
| | self.current_chart_id = path['chart_id'] |
| | self.current_model = path['model'] |
| | return True |
| | return False |
| |
|
| | state = AppState() |
| |
|
| |
|
| | |
| |
|
| | def get_dataset_choices() -> Tuple[List[str], List[str], List[str], List[str]]: |
| | """获取数据集的选择项""" |
| | structure = data_manager.get_dataset_structure() |
| | |
| | sources = list(structure.get('sources', {}).keys()) |
| | |
| | chart_types = [] |
| | charts = [] |
| | models = [] |
| | |
| | if state.current_source: |
| | source_data = structure['sources'].get(state.current_source, {}) |
| | chart_types = list(source_data.get('chart_types', {}).keys()) |
| | |
| | if state.current_chart_type: |
| | charts = data_manager.get_chart_list(state.current_source, state.current_chart_type) |
| | ct_data = source_data.get('chart_types', {}).get(state.current_chart_type, {}) |
| | models = ct_data.get('models', []) |
| | |
| | return sources, chart_types, charts, models |
| |
|
| |
|
| | def update_chart_type_dropdown(source: str): |
| | """更新图表类型下拉框""" |
| | state.current_source = source |
| | structure = data_manager.get_dataset_structure() |
| | |
| | chart_types = list(structure.get('sources', {}).get(source, {}).get('chart_types', {}).keys()) |
| | |
| | return gr.Dropdown(choices=chart_types, value=chart_types[0] if chart_types else None) |
| |
|
| |
|
| | def update_chart_dropdown(source: str, chart_type: str): |
| | """更新图表和模型下拉框""" |
| | state.current_source = source |
| | state.current_chart_type = chart_type |
| | |
| | charts = data_manager.get_chart_list(source, chart_type) |
| | structure = data_manager.get_dataset_structure() |
| | ct_data = structure.get('sources', {}).get(source, {}).get('chart_types', {}).get(chart_type, {}) |
| | models = ct_data.get('models', []) |
| | |
| | return ( |
| | gr.Dropdown(choices=charts, value=charts[0] if charts else None), |
| | gr.Dropdown(choices=models, value=models[0] if models else None) |
| | ) |
| |
|
| |
|
| | def create_embedded_html(html_content: str, chart_id: str = "") -> str: |
| | """ |
| | 创建嵌入式的 HTML 显示 |
| | |
| | 使用 data URI 方式嵌入 HTML 内容到 iframe 中 |
| | """ |
| | if not html_content: |
| | return f""" |
| | <div style="display:flex;flex-direction:column;align-items:center;justify-content:center; |
| | min-height:400px;color:#999;border:2px dashed #ddd;border-radius:12px;background:#fafafa;"> |
| | <div style="font-size:48px;margin-bottom:16px;">📭</div> |
| | <div style="font-size:18px;font-weight:500;">暂无图表内容</div> |
| | <div style="font-size:14px;margin-top:8px;">图表 ID: {chart_id or '未知'}</div> |
| | <div style="font-size:12px;margin-top:16px;color:#888;">请检查数据集目录中是否存在该图表的 HTML 文件</div> |
| | </div> |
| | """ |
| | |
| | |
| | html_bytes = html_content.encode('utf-8') |
| | html_base64 = base64.b64encode(html_bytes).decode('utf-8') |
| | |
| | |
| | iframe_html = f""" |
| | <iframe |
| | src="data:text/html;base64,{html_base64}" |
| | style="width:100%;height:500px;border:1px solid #e0e0e0;border-radius:8px;background:#fff;" |
| | sandbox="allow-scripts allow-same-origin" |
| | loading="lazy" |
| | ></iframe> |
| | """ |
| | |
| | return iframe_html |
| |
|
| |
|
| | def load_chart_data(source: str, chart_type: str, chart_id: str, model: str): |
| | """ |
| | 加载图表数据并返回所有 UI 更新 |
| | |
| | Returns: |
| | 包含所有 UI 组件更新值的元组 |
| | """ |
| | if not all([source, chart_type, chart_id, model]): |
| | return [ |
| | create_embedded_html(""), |
| | "### 请在左侧选择图表", |
| | "[]", |
| | "等待加载数据...", |
| | "请在左侧选择图表", |
| | "{}", |
| | gr.Radio(choices=[], value=None), |
| | "" |
| | ] |
| | |
| | |
| | state.set_position(source, chart_type, chart_id, model) |
| | |
| | |
| | chart_data = data_manager.get_chart_data(source, chart_type, chart_id) |
| | html_content = chart_data.get('html_content', '') |
| | label_info = chart_data.get('label_info', {}) |
| | |
| | |
| | embedded_html = create_embedded_html(html_content, chart_id) |
| | |
| | |
| | debug_info = f"📁 {source}/{chart_type}/{chart_id} | HTML: {len(html_content)} 字符" |
| | |
| | |
| | if label_info: |
| | label_text = f""" |
| | ### 图表信息 |
| | |
| | | 属性 | 值 | |
| | |------|-----| |
| | | **编号** | {label_info.get('Number', '-')} | |
| | | **类型** | {label_info.get('Type', '-')} | |
| | | **来源** | {label_info.get('Source', '-')} | |
| | | **主题** | {label_info.get('Topic', '-')} | |
| | | **描述** | {label_info.get('Describe', '-')} | |
| | | **链接** | [查看原图]({label_info.get('Weblink', '#')}) | |
| | """ |
| | else: |
| | label_text = "### ⚠️ 暂无标签信息" |
| | |
| | |
| | qa_list = data_manager.get_qa_list(source, chart_type, model, chart_id) |
| | |
| | |
| | existing_reviews = {} |
| | for review in data_manager.get_reviews_by_chart(chart_id, model): |
| | existing_reviews[review['qa_id']] = review |
| | |
| | |
| | progress_text = f"当前: {state.current_index + 1} / {len(state.all_paths)} 个图表" |
| | |
| | |
| | stats = data_manager.get_review_stats() |
| | status_text = f"已审核: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}" |
| | |
| | |
| | qa_choices = [f"Q{i+1}: {qa.question[:50]}..." for i, qa in enumerate(qa_list)] if qa_list else [] |
| | |
| | return [ |
| | embedded_html, |
| | label_text, |
| | json.dumps([{"id": qa.id, "question": qa.question, "answer": qa.answer} for qa in qa_list]), |
| | status_text, |
| | progress_text, |
| | json.dumps(existing_reviews), |
| | gr.Radio(choices=qa_choices, value=qa_choices[0] if qa_choices else None), |
| | debug_info |
| | ] |
| |
|
| |
|
| | def navigate_prev(): |
| | """导航到上一个图表""" |
| | if state.navigate(-1): |
| | path = state.get_current_path() |
| | if path: |
| | return ( |
| | gr.Dropdown(value=path['source']), |
| | gr.Dropdown(value=path['chart_type']), |
| | gr.Dropdown(value=path['chart_id']), |
| | gr.Dropdown(value=path['model']) |
| | ) |
| | return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()] |
| |
|
| |
|
| | def navigate_next(): |
| | """导航到下一个图表""" |
| | if state.navigate(1): |
| | path = state.get_current_path() |
| | if path: |
| | return ( |
| | gr.Dropdown(value=path['source']), |
| | gr.Dropdown(value=path['chart_type']), |
| | gr.Dropdown(value=path['chart_id']), |
| | gr.Dropdown(value=path['model']) |
| | ) |
| | return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()] |
| |
|
| |
|
| | def save_review_handler( |
| | qa_id: str, |
| | chart_id: str, |
| | source: str, |
| | chart_type: str, |
| | model: str, |
| | original_question: str, |
| | original_answer: str, |
| | status: str, |
| | modified_question: str, |
| | modified_answer: str, |
| | issue_type: str, |
| | comment: str, |
| | reviewer: str |
| | ) -> str: |
| | """保存审核记录""" |
| | if not qa_id: |
| | return "❌ 请先选择一个问答对" |
| | |
| | review_data = { |
| | "qa_id": qa_id, |
| | "chart_id": chart_id, |
| | "source": source, |
| | "chart_type": chart_type, |
| | "model": model, |
| | "original_question": original_question, |
| | "original_answer": original_answer, |
| | "status": status, |
| | "modified_question": modified_question, |
| | "modified_answer": modified_answer, |
| | "issue_type": issue_type, |
| | "comment": comment, |
| | "reviewer": reviewer |
| | } |
| | |
| | result = data_manager.save_review(review_data) |
| | |
| | |
| | stats = data_manager.get_review_stats() |
| | return f"✅ 已保存! 总计: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}" |
| |
|
| |
|
| | def export_reviews_handler(): |
| | """导出审核记录""" |
| | output_path = data_manager.export_reviews("./reviews_export.json") |
| | return f"✅ 审核记录已导出至: {output_path}" |
| |
|
| |
|
| | |
| |
|
| | def create_ui(): |
| | """创建 Gradio 界面""" |
| | |
| | |
| | custom_css = """ |
| | .chart-container { |
| | min-height: 520px; |
| | } |
| | |
| | .control-panel { |
| | background: #f8f9fa; |
| | padding: 15px; |
| | border-radius: 8px; |
| | margin-bottom: 10px; |
| | } |
| | |
| | .debug-panel { |
| | font-size: 12px; |
| | color: #666; |
| | padding: 8px; |
| | background: #f5f5f5; |
| | border-radius: 4px; |
| | margin-top: 10px; |
| | } |
| | """ |
| | |
| | with gr.Blocks( |
| | title="图表问答数据集审核系统", |
| | theme=gr.themes.Soft(), |
| | css=custom_css |
| | ) as app: |
| | |
| | |
| | qa_data_json = gr.State(value="[]") |
| | current_reviews_json = gr.State(value="{}") |
| | |
| | |
| | gr.Markdown(""" |
| | # 📊 图表问答数据集审核系统 |
| | |
| | 审核每个图表对应的问题和答案是否合理正确。使用 ← → 按钮切换图表。 |
| | """) |
| | |
| | |
| | with gr.Row(): |
| | status_text = gr.Textbox( |
| | label="审核统计", |
| | value="等待加载数据...", |
| | interactive=False, |
| | show_label=False, |
| | scale=2 |
| | ) |
| | progress_text = gr.Textbox( |
| | label="进度", |
| | value="请在左侧选择图表", |
| | interactive=False, |
| | show_label=False, |
| | scale=1 |
| | ) |
| | |
| | |
| | with gr.Row(): |
| | |
| | with gr.Column(scale=1, min_width=250): |
| | gr.Markdown("### 📁 数据选择") |
| | |
| | source_dropdown = gr.Dropdown( |
| | label="数据来源 (Source)", |
| | choices=[], |
| | interactive=True |
| | ) |
| | |
| | chart_type_dropdown = gr.Dropdown( |
| | label="图表类型 (Chart Type)", |
| | choices=[], |
| | interactive=True |
| | ) |
| | |
| | chart_dropdown = gr.Dropdown( |
| | label="图表 ID", |
| | choices=[], |
| | interactive=True |
| | ) |
| | |
| | model_dropdown = gr.Dropdown( |
| | label="模型 (Model)", |
| | choices=[], |
| | interactive=True |
| | ) |
| | |
| | gr.Markdown("---") |
| | |
| | |
| | with gr.Row(): |
| | prev_btn = gr.Button("⬅️ 上一个") |
| | next_btn = gr.Button("➡️ 下一个") |
| | |
| | |
| | export_btn = gr.Button("📥 导出审核记录", variant="secondary") |
| | export_result = gr.Textbox(label="", visible=False) |
| | |
| | |
| | reviewer_input = gr.Textbox( |
| | label="审核人", |
| | value="default", |
| | interactive=True |
| | ) |
| | |
| | |
| | debug_info = gr.Textbox( |
| | label="调试信息", |
| | value="", |
| | interactive=False, |
| | show_label=False, |
| | elem_classes=["debug-panel"] |
| | ) |
| | |
| | |
| | with gr.Column(scale=2, min_width=400): |
| | gr.Markdown("### 📈 图表展示") |
| | |
| | |
| | html_display = gr.HTML( |
| | value="<div style='text-align:center;padding:50px;color:#999;'>请选择图表</div>", |
| | elem_classes=["chart-container"] |
| | ) |
| | |
| | |
| | with gr.Column(scale=2, min_width=400): |
| | |
| | gr.Markdown("### 📝 图表标签") |
| | label_display = gr.Markdown( |
| | value="暂无信息", |
| | elem_classes=["control-panel"] |
| | ) |
| | |
| | |
| | gr.Markdown("### ❓ 问答审核") |
| | |
| | |
| | current_qa_id = gr.Textbox(visible=False, value="") |
| | |
| | |
| | qa_question_display = gr.Textbox( |
| | label="问题", |
| | interactive=False, |
| | lines=2, |
| | value="" |
| | ) |
| | qa_answer_display = gr.Textbox( |
| | label="答案", |
| | interactive=False, |
| | lines=1, |
| | value="" |
| | ) |
| | |
| | |
| | qa_selector = gr.Radio( |
| | label="选择要审核的问答对", |
| | choices=[], |
| | interactive=True |
| | ) |
| | |
| | gr.Markdown("---") |
| | gr.Markdown("#### 审核操作") |
| | |
| | |
| | status_radio = gr.Radio( |
| | label="审核状态", |
| | choices=[ |
| | ("✅ 正确", "correct"), |
| | ("❌ 错误", "incorrect"), |
| | ("✏️ 需修改", "needs_modification"), |
| | ("⏳ 待定", "pending") |
| | ], |
| | value="pending", |
| | interactive=True |
| | ) |
| | |
| | |
| | issue_type_dropdown = gr.Dropdown( |
| | label="问题类型", |
| | choices=[ |
| | "问题歧义", |
| | "答案错误", |
| | "图表不清晰", |
| | "问题不合理", |
| | "答案格式错误", |
| | "其他" |
| | ], |
| | interactive=True, |
| | value="" |
| | ) |
| | |
| | |
| | modified_question = gr.Textbox( |
| | label="修改后的问题", |
| | placeholder="如需修改问题,请在此输入...", |
| | lines=2, |
| | interactive=True, |
| | value="" |
| | ) |
| | |
| | modified_answer = gr.Textbox( |
| | label="修改后的答案", |
| | placeholder="如需修改答案,请在此输入...", |
| | lines=1, |
| | interactive=True, |
| | value="" |
| | ) |
| | |
| | |
| | comment_textbox = gr.Textbox( |
| | label="评论/备注", |
| | placeholder="请输入审核意见或备注...", |
| | lines=2, |
| | interactive=True, |
| | value="" |
| | ) |
| | |
| | |
| | save_btn = gr.Button("💾 保存审核结果", variant="primary") |
| | save_result = gr.Textbox(label="", visible=False) |
| | |
| | |
| | |
| | |
| | def init_dataset(): |
| | structure = data_manager.get_dataset_structure() |
| | sources = list(structure.get('sources', {}).keys()) |
| | return gr.Dropdown(choices=sources, value=sources[0] if sources else None) |
| | |
| | app.load( |
| | fn=init_dataset, |
| | outputs=[source_dropdown] |
| | ) |
| | |
| | |
| | source_dropdown.change( |
| | fn=update_chart_type_dropdown, |
| | inputs=[source_dropdown], |
| | outputs=[chart_type_dropdown] |
| | ) |
| | |
| | |
| | chart_type_dropdown.change( |
| | fn=update_chart_dropdown, |
| | inputs=[source_dropdown, chart_type_dropdown], |
| | outputs=[chart_dropdown, model_dropdown] |
| | ) |
| | |
| | |
| | model_dropdown.change( |
| | fn=load_chart_data, |
| | inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown], |
| | outputs=[ |
| | html_display, label_display, qa_data_json, status_text, progress_text, |
| | current_reviews_json, qa_selector, debug_info |
| | ] |
| | ) |
| | |
| | chart_dropdown.change( |
| | fn=load_chart_data, |
| | inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown], |
| | outputs=[ |
| | html_display, label_display, qa_data_json, status_text, progress_text, |
| | current_reviews_json, qa_selector, debug_info |
| | ] |
| | ) |
| | |
| | |
| | def on_qa_selected(qa_index_str, qa_json, reviews_json): |
| | if not qa_index_str or not qa_json: |
| | return ["", "", "", gr.Radio(value="pending"), "", "", "", ""] |
| | |
| | try: |
| | qa_list = json.loads(qa_json) |
| | reviews = json.loads(reviews_json) |
| | |
| | |
| | index = int(qa_index_str.split(":")[0].replace("Q", "")) - 1 |
| | qa = qa_list[index] |
| | |
| | |
| | review = reviews.get(qa['id'], {}) |
| | |
| | return [ |
| | qa['id'], |
| | qa['question'], |
| | qa['answer'], |
| | gr.Radio(value=review.get('status', 'pending')), |
| | review.get('issue_type', ''), |
| | review.get('modified_question', ''), |
| | review.get('modified_answer', ''), |
| | review.get('comment', '') |
| | ] |
| | except Exception as e: |
| | print(f"Error in on_qa_selected: {e}") |
| | return ["", "", "", gr.Radio(value="pending"), "", "", "", ""] |
| | |
| | qa_selector.change( |
| | fn=on_qa_selected, |
| | inputs=[qa_selector, qa_data_json, current_reviews_json], |
| | outputs=[ |
| | current_qa_id, qa_question_display, qa_answer_display, |
| | status_radio, issue_type_dropdown, modified_question, modified_answer, comment_textbox |
| | ] |
| | ) |
| | |
| | |
| | prev_btn.click( |
| | fn=navigate_prev, |
| | outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown] |
| | ) |
| | |
| | next_btn.click( |
| | fn=navigate_next, |
| | outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown] |
| | ) |
| | |
| | |
| | save_btn.click( |
| | fn=save_review_handler, |
| | inputs=[ |
| | current_qa_id, |
| | chart_dropdown, |
| | source_dropdown, |
| | chart_type_dropdown, |
| | model_dropdown, |
| | qa_question_display, |
| | qa_answer_display, |
| | status_radio, |
| | modified_question, |
| | modified_answer, |
| | issue_type_dropdown, |
| | comment_textbox, |
| | reviewer_input |
| | ], |
| | outputs=[save_result] |
| | ).then( |
| | fn=lambda: gr.Textbox(visible=True), |
| | outputs=[save_result] |
| | ) |
| | |
| | |
| | export_btn.click( |
| | fn=export_reviews_handler, |
| | outputs=[export_result] |
| | ).then( |
| | fn=lambda: gr.Textbox(visible=True), |
| | outputs=[export_result] |
| | ) |
| | |
| | return app |
| |
|
| |
|
| | |
| |
|
| | if __name__ == "__main__": |
| | app = create_ui() |
| | app.launch( |
| | server_name="0.0.0.0", |
| | server_port=7860, |
| | share=True |
| | ) |