Chart / test.py
adddrett's picture
2
1c91312
"""
图表问答数据集审核系统 - Gradio 5.x 应用
用于人工审核每个图表对应的问题和答案是否合理正确
"""
import gradio as gr
from data_manager import DataManager, data_manager
from typing import Dict, List, Optional, Tuple, Any
import json
import os
import base64
# ============== 全局状态 ==============
class AppState:
"""应用状态管理"""
def __init__(self):
self.current_source: str = ""
self.current_chart_type: str = ""
self.current_chart_id: str = ""
self.current_model: str = ""
self.all_paths: List[Dict] = []
self.current_index: int = -1
# 初始化时获取所有路径
self.refresh_paths()
def refresh_paths(self):
"""刷新所有图表路径"""
self.all_paths = data_manager.get_all_chart_paths()
def get_current_path(self) -> Optional[Dict]:
"""获取当前路径信息"""
if 0 <= self.current_index < len(self.all_paths):
return self.all_paths[self.current_index]
return None
def set_position(self, source: str, chart_type: str, chart_id: str, model: str):
"""设置当前位置"""
self.current_source = source
self.current_chart_type = chart_type
self.current_chart_id = chart_id
self.current_model = model
# 更新索引
for i, path in enumerate(self.all_paths):
if (path['source'] == source and
path['chart_type'] == chart_type and
path['chart_id'] == chart_id and
path['model'] == model):
self.current_index = i
break
def navigate(self, direction: int) -> bool:
"""
导航到上一个或下一个图表
Args:
direction: 1 表示下一个,-1 表示上一个
Returns:
是否成功导航
"""
new_index = self.current_index + direction
if 0 <= new_index < len(self.all_paths):
self.current_index = new_index
path = self.all_paths[new_index]
self.current_source = path['source']
self.current_chart_type = path['chart_type']
self.current_chart_id = path['chart_id']
self.current_model = path['model']
return True
return False
state = AppState()
# ============== UI 更新函数 ==============
def get_dataset_choices() -> Tuple[List[str], List[str], List[str], List[str]]:
"""获取数据集的选择项"""
structure = data_manager.get_dataset_structure()
sources = list(structure.get('sources', {}).keys())
chart_types = []
charts = []
models = []
if state.current_source:
source_data = structure['sources'].get(state.current_source, {})
chart_types = list(source_data.get('chart_types', {}).keys())
if state.current_chart_type:
charts = data_manager.get_chart_list(state.current_source, state.current_chart_type)
ct_data = source_data.get('chart_types', {}).get(state.current_chart_type, {})
models = ct_data.get('models', [])
return sources, chart_types, charts, models
def update_chart_type_dropdown(source: str):
"""更新图表类型下拉框"""
state.current_source = source
structure = data_manager.get_dataset_structure()
chart_types = list(structure.get('sources', {}).get(source, {}).get('chart_types', {}).keys())
return gr.Dropdown(choices=chart_types, value=chart_types[0] if chart_types else None)
def update_chart_dropdown(source: str, chart_type: str):
"""更新图表和模型下拉框"""
state.current_source = source
state.current_chart_type = chart_type
charts = data_manager.get_chart_list(source, chart_type)
structure = data_manager.get_dataset_structure()
ct_data = structure.get('sources', {}).get(source, {}).get('chart_types', {}).get(chart_type, {})
models = ct_data.get('models', [])
return (
gr.Dropdown(choices=charts, value=charts[0] if charts else None),
gr.Dropdown(choices=models, value=models[0] if models else None)
)
def create_embedded_html(html_content: str, chart_id: str = "") -> str:
"""
创建嵌入式的 HTML 显示
使用 data URI 方式嵌入 HTML 内容到 iframe 中
"""
if not html_content:
return f"""
<div style="display:flex;flex-direction:column;align-items:center;justify-content:center;
min-height:400px;color:#999;border:2px dashed #ddd;border-radius:12px;background:#fafafa;">
<div style="font-size:48px;margin-bottom:16px;">📭</div>
<div style="font-size:18px;font-weight:500;">暂无图表内容</div>
<div style="font-size:14px;margin-top:8px;">图表 ID: {chart_id or '未知'}</div>
<div style="font-size:12px;margin-top:16px;color:#888;">请检查数据集目录中是否存在该图表的 HTML 文件</div>
</div>
"""
# 使用 base64 编码 HTML 内容,避免引号转义问题
html_bytes = html_content.encode('utf-8')
html_base64 = base64.b64encode(html_bytes).decode('utf-8')
# 使用 data URI
iframe_html = f"""
<iframe
src="data:text/html;base64,{html_base64}"
style="width:100%;height:500px;border:1px solid #e0e0e0;border-radius:8px;background:#fff;"
sandbox="allow-scripts allow-same-origin"
loading="lazy"
></iframe>
"""
return iframe_html
def load_chart_data(source: str, chart_type: str, chart_id: str, model: str):
"""
加载图表数据并返回所有 UI 更新
Returns:
包含所有 UI 组件更新值的元组
"""
if not all([source, chart_type, chart_id, model]):
return [
create_embedded_html(""), # html_display
"### 请在左侧选择图表", # label_info
"[]", # qa_data (JSON string)
"等待加载数据...", # status_text
"请在左侧选择图表", # progress_text
"{}", # current_qa_reviews (JSON string)
gr.Radio(choices=[], value=None), # qa_selector
"" # debug_info
]
# 更新状态
state.set_position(source, chart_type, chart_id, model)
# 获取图表数据
chart_data = data_manager.get_chart_data(source, chart_type, chart_id)
html_content = chart_data.get('html_content', '')
label_info = chart_data.get('label_info', {})
# 创建嵌入式 HTML
embedded_html = create_embedded_html(html_content, chart_id)
# 调试信息
debug_info = f"📁 {source}/{chart_type}/{chart_id} | HTML: {len(html_content)} 字符"
# 格式化标签信息
if label_info:
label_text = f"""
### 图表信息
| 属性 | 值 |
|------|-----|
| **编号** | {label_info.get('Number', '-')} |
| **类型** | {label_info.get('Type', '-')} |
| **来源** | {label_info.get('Source', '-')} |
| **主题** | {label_info.get('Topic', '-')} |
| **描述** | {label_info.get('Describe', '-')} |
| **链接** | [查看原图]({label_info.get('Weblink', '#')}) |
"""
else:
label_text = "### ⚠️ 暂无标签信息"
# 获取 QA 列表
qa_list = data_manager.get_qa_list(source, chart_type, model, chart_id)
# 获取已有的审核记录
existing_reviews = {}
for review in data_manager.get_reviews_by_chart(chart_id, model):
existing_reviews[review['qa_id']] = review
# 更新进度文本
progress_text = f"当前: {state.current_index + 1} / {len(state.all_paths)} 个图表"
# 状态文本
stats = data_manager.get_review_stats()
status_text = f"已审核: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}"
# QA 选择器选项
qa_choices = [f"Q{i+1}: {qa.question[:50]}..." for i, qa in enumerate(qa_list)] if qa_list else []
return [
embedded_html, # html_display
label_text, # label_info
json.dumps([{"id": qa.id, "question": qa.question, "answer": qa.answer} for qa in qa_list]), # qa_data (JSON string)
status_text, # status_text
progress_text, # progress_text
json.dumps(existing_reviews), # current_qa_reviews (JSON string)
gr.Radio(choices=qa_choices, value=qa_choices[0] if qa_choices else None), # qa_selector
debug_info # debug_info
]
def navigate_prev():
"""导航到上一个图表"""
if state.navigate(-1):
path = state.get_current_path()
if path:
return (
gr.Dropdown(value=path['source']),
gr.Dropdown(value=path['chart_type']),
gr.Dropdown(value=path['chart_id']),
gr.Dropdown(value=path['model'])
)
return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()]
def navigate_next():
"""导航到下一个图表"""
if state.navigate(1):
path = state.get_current_path()
if path:
return (
gr.Dropdown(value=path['source']),
gr.Dropdown(value=path['chart_type']),
gr.Dropdown(value=path['chart_id']),
gr.Dropdown(value=path['model'])
)
return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()]
def save_review_handler(
qa_id: str,
chart_id: str,
source: str,
chart_type: str,
model: str,
original_question: str,
original_answer: str,
status: str,
modified_question: str,
modified_answer: str,
issue_type: str,
comment: str,
reviewer: str
) -> str:
"""保存审核记录"""
if not qa_id:
return "❌ 请先选择一个问答对"
review_data = {
"qa_id": qa_id,
"chart_id": chart_id,
"source": source,
"chart_type": chart_type,
"model": model,
"original_question": original_question,
"original_answer": original_answer,
"status": status,
"modified_question": modified_question,
"modified_answer": modified_answer,
"issue_type": issue_type,
"comment": comment,
"reviewer": reviewer
}
result = data_manager.save_review(review_data)
# 返回更新后的统计
stats = data_manager.get_review_stats()
return f"✅ 已保存! 总计: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}"
def export_reviews_handler():
"""导出审核记录"""
output_path = data_manager.export_reviews("./reviews_export.json")
return f"✅ 审核记录已导出至: {output_path}"
# ============== 创建 Gradio 界面 ==============
def create_ui():
"""创建 Gradio 界面"""
# 自定义 CSS
custom_css = """
.chart-container {
min-height: 520px;
}
.control-panel {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
margin-bottom: 10px;
}
.debug-panel {
font-size: 12px;
color: #666;
padding: 8px;
background: #f5f5f5;
border-radius: 4px;
margin-top: 10px;
}
"""
with gr.Blocks(
title="图表问答数据集审核系统",
theme=gr.themes.Soft(),
css=custom_css
) as app:
# 隐藏的状态存储(使用 JSON 字符串)
qa_data_json = gr.State(value="[]")
current_reviews_json = gr.State(value="{}")
# ==================== 标题栏 ====================
gr.Markdown("""
# 📊 图表问答数据集审核系统
审核每个图表对应的问题和答案是否合理正确。使用 ← → 按钮切换图表。
""")
# ==================== 顶部状态栏 ====================
with gr.Row():
status_text = gr.Textbox(
label="审核统计",
value="等待加载数据...",
interactive=False,
show_label=False,
scale=2
)
progress_text = gr.Textbox(
label="进度",
value="请在左侧选择图表",
interactive=False,
show_label=False,
scale=1
)
# ==================== 主内容区 ====================
with gr.Row():
# ===== 左侧:导航控制 =====
with gr.Column(scale=1, min_width=250):
gr.Markdown("### 📁 数据选择")
source_dropdown = gr.Dropdown(
label="数据来源 (Source)",
choices=[],
interactive=True
)
chart_type_dropdown = gr.Dropdown(
label="图表类型 (Chart Type)",
choices=[],
interactive=True
)
chart_dropdown = gr.Dropdown(
label="图表 ID",
choices=[],
interactive=True
)
model_dropdown = gr.Dropdown(
label="模型 (Model)",
choices=[],
interactive=True
)
gr.Markdown("---")
# 导航按钮
with gr.Row():
prev_btn = gr.Button("⬅️ 上一个")
next_btn = gr.Button("➡️ 下一个")
# 导出按钮
export_btn = gr.Button("📥 导出审核记录", variant="secondary")
export_result = gr.Textbox(label="", visible=False)
# 审核人设置
reviewer_input = gr.Textbox(
label="审核人",
value="default",
interactive=True
)
# 调试信息
debug_info = gr.Textbox(
label="调试信息",
value="",
interactive=False,
show_label=False,
elem_classes=["debug-panel"]
)
# ===== 中间:图表展示 =====
with gr.Column(scale=2, min_width=400):
gr.Markdown("### 📈 图表展示")
# HTML 图表展示(使用 iframe)
html_display = gr.HTML(
value="<div style='text-align:center;padding:50px;color:#999;'>请选择图表</div>",
elem_classes=["chart-container"]
)
# ===== 右侧:标签信息和 QA 审核 =====
with gr.Column(scale=2, min_width=400):
# 标签信息
gr.Markdown("### 📝 图表标签")
label_display = gr.Markdown(
value="暂无信息",
elem_classes=["control-panel"]
)
# QA 审核区
gr.Markdown("### ❓ 问答审核")
# 当前选中的 QA 信息(隐藏)
current_qa_id = gr.Textbox(visible=False, value="")
# QA 显示
qa_question_display = gr.Textbox(
label="问题",
interactive=False,
lines=2,
value=""
)
qa_answer_display = gr.Textbox(
label="答案",
interactive=False,
lines=1,
value=""
)
# QA 选择器
qa_selector = gr.Radio(
label="选择要审核的问答对",
choices=[],
interactive=True
)
gr.Markdown("---")
gr.Markdown("#### 审核操作")
# 审核状态选择
status_radio = gr.Radio(
label="审核状态",
choices=[
("✅ 正确", "correct"),
("❌ 错误", "incorrect"),
("✏️ 需修改", "needs_modification"),
("⏳ 待定", "pending")
],
value="pending",
interactive=True
)
# 问题类型
issue_type_dropdown = gr.Dropdown(
label="问题类型",
choices=[
"问题歧义",
"答案错误",
"图表不清晰",
"问题不合理",
"答案格式错误",
"其他"
],
interactive=True,
value=""
)
# 修改后的问题和答案
modified_question = gr.Textbox(
label="修改后的问题",
placeholder="如需修改问题,请在此输入...",
lines=2,
interactive=True,
value=""
)
modified_answer = gr.Textbox(
label="修改后的答案",
placeholder="如需修改答案,请在此输入...",
lines=1,
interactive=True,
value=""
)
# 评论
comment_textbox = gr.Textbox(
label="评论/备注",
placeholder="请输入审核意见或备注...",
lines=2,
interactive=True,
value=""
)
# 保存按钮
save_btn = gr.Button("💾 保存审核结果", variant="primary")
save_result = gr.Textbox(label="", visible=False)
# ==================== 事件绑定 ====================
# 初始化数据集选择
def init_dataset():
structure = data_manager.get_dataset_structure()
sources = list(structure.get('sources', {}).keys())
return gr.Dropdown(choices=sources, value=sources[0] if sources else None)
app.load(
fn=init_dataset,
outputs=[source_dropdown]
)
# Source 变化 -> 更新 Chart Type
source_dropdown.change(
fn=update_chart_type_dropdown,
inputs=[source_dropdown],
outputs=[chart_type_dropdown]
)
# Chart Type 变化 -> 更新 Chart 和 Model
chart_type_dropdown.change(
fn=update_chart_dropdown,
inputs=[source_dropdown, chart_type_dropdown],
outputs=[chart_dropdown, model_dropdown]
)
# 选择图表 -> 加载数据
model_dropdown.change(
fn=load_chart_data,
inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown],
outputs=[
html_display, label_display, qa_data_json, status_text, progress_text,
current_reviews_json, qa_selector, debug_info
]
)
chart_dropdown.change(
fn=load_chart_data,
inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown],
outputs=[
html_display, label_display, qa_data_json, status_text, progress_text,
current_reviews_json, qa_selector, debug_info
]
)
# QA 选择器变化 -> 更新审核面板
def on_qa_selected(qa_index_str, qa_json, reviews_json):
if not qa_index_str or not qa_json:
return ["", "", "", gr.Radio(value="pending"), "", "", "", ""]
try:
qa_list = json.loads(qa_json)
reviews = json.loads(reviews_json)
# 解析索引
index = int(qa_index_str.split(":")[0].replace("Q", "")) - 1
qa = qa_list[index]
# 检查是否有现有审核
review = reviews.get(qa['id'], {})
return [
qa['id'], # current_qa_id
qa['question'], # qa_question_display
qa['answer'], # qa_answer_display
gr.Radio(value=review.get('status', 'pending')), # status_radio
review.get('issue_type', ''), # issue_type_dropdown
review.get('modified_question', ''), # modified_question
review.get('modified_answer', ''), # modified_answer
review.get('comment', '') # comment_textbox
]
except Exception as e:
print(f"Error in on_qa_selected: {e}")
return ["", "", "", gr.Radio(value="pending"), "", "", "", ""]
qa_selector.change(
fn=on_qa_selected,
inputs=[qa_selector, qa_data_json, current_reviews_json],
outputs=[
current_qa_id, qa_question_display, qa_answer_display,
status_radio, issue_type_dropdown, modified_question, modified_answer, comment_textbox
]
)
# 导航按钮
prev_btn.click(
fn=navigate_prev,
outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown]
)
next_btn.click(
fn=navigate_next,
outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown]
)
# 保存审核
save_btn.click(
fn=save_review_handler,
inputs=[
current_qa_id,
chart_dropdown,
source_dropdown,
chart_type_dropdown,
model_dropdown,
qa_question_display,
qa_answer_display,
status_radio,
modified_question,
modified_answer,
issue_type_dropdown,
comment_textbox,
reviewer_input
],
outputs=[save_result]
).then(
fn=lambda: gr.Textbox(visible=True),
outputs=[save_result]
)
# 导出
export_btn.click(
fn=export_reviews_handler,
outputs=[export_result]
).then(
fn=lambda: gr.Textbox(visible=True),
outputs=[export_result]
)
return app
# ============== 主入口 ==============
if __name__ == "__main__":
app = create_ui()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)