import os import json import datetime import requests from email.utils import parseaddr import gradio as gr import pandas as pd import numpy as np from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn, ModelType, fields, WeightType, Precision ) # ATLAS specific imports - use populate module to avoid transformers dependency try: from src.populate import process_sage_results_for_leaderboard, get_sage_leaderboard_df SAGE_MODULES_AVAILABLE = process_sage_results_for_leaderboard is not None if SAGE_MODULES_AVAILABLE: print("✅ ATLAS modules loaded successfully") else: print("❌ ATLAS modules not available") except ImportError as e: print(f"Warning: ATLAS modules not available: {e}") SAGE_MODULES_AVAILABLE = False # Configuration TOKEN = os.environ.get("HF_TOKEN", None) OWNER = "opencompass" # OSS submission tracking paths SUBMISSION_TRACKING_PATH = "atlas_eval/submissions/user_tracking/" SUBMISSION_HISTORY_FILE = "submission_history.json" def format_error(msg): return f"

{msg}

" def format_warning(msg): return f"

{msg}

" def format_log(msg): return f"

{msg}

" def model_hyperlink(link, model_name): if link and link.startswith("http"): return f'{model_name}' return model_name def load_submission_history(): """Load user submission history from OSS""" try: from src.oss.oss_file_manager import OSSFileManager oss_manager = OSSFileManager() # Try to download submission history file history_content = oss_manager.download_file_content( SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE ) if history_content: return json.loads(history_content) else: print("📝 Creating new submission history") return {} except Exception as e: print(f"⚠️ Failed to load submission history: {e}") return {} def save_submission_history(history): """Save user submission history to OSS""" try: from src.oss.oss_file_manager import OSSFileManager oss_manager = OSSFileManager() # Upload submission history history_json = json.dumps(history, indent=2, ensure_ascii=False) success = oss_manager.upload_file_content( content=history_json, object_key=SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE ) return success except Exception as e: print(f"❌ Failed to save submission history: {e}") return False def check_user_submission_eligibility(profile: gr.OAuthProfile, org_name: str): """Check user submission eligibility""" try: # 1. Check account age limit (60 days) user_data = requests.get(f"https://huggingface.co/api/users/{profile.username}/overview") if user_data.status_code == 200: creation_date = json.loads(user_data.content)["createdAt"] account_age = datetime.datetime.now() - datetime.datetime.strptime(creation_date, '%Y-%m-%dT%H:%M:%S.%fZ') if account_age < datetime.timedelta(days=60): return False, "This account does not meet the submission requirement. Account age must exceed 60 days." else: return False, "Unable to verify account information. Please try again later." # 2. Check daily submission limit submission_history = load_submission_history() user_submissions = submission_history.get(profile.username, []) today = datetime.datetime.today().strftime('%Y-%m-%d') today_submissions = [s for s in user_submissions if s.get("date", "") == today] if len(today_submissions) >= 2: return False, "You have already submitted twice today. Please try again tomorrow." return True, "Eligibility check passed" except Exception as e: print(f"❌ User eligibility check failed: {e}") return False, f"System check error, please try again later: {str(e)}" def record_user_submission(profile: gr.OAuthProfile, model_name: str, org_name: str, email: str): """Record user submission""" try: submission_history = load_submission_history() if profile.username not in submission_history: submission_history[profile.username] = [] # Record this submission submission_record = { "date": datetime.datetime.today().strftime('%Y-%m-%d'), "time": datetime.datetime.now().strftime('%H:%M:%S'), "model": model_name, "organization": org_name, "email": email, "username": profile.username } submission_history[profile.username].append(submission_record) # Save submission history return save_submission_history(submission_history) except Exception as e: print(f"❌ Failed to record submission history: {e}") return False def get_leaderboard_dataframe(): """Generate leaderboard dataframe from ATLAS results""" print("🔄 Loading ATLAS leaderboard data...") if not SAGE_MODULES_AVAILABLE: print("❌ ATLAS modules not available") return pd.DataFrame() try: # Use the updated get_sage_leaderboard_df function df = get_sage_leaderboard_df() if df.empty: print("❌ No ATLAS results found") return pd.DataFrame() print(f"✅ Generated dataframe with {len(df)} rows") return df except Exception as e: print(f"❌ Error generating leaderboard dataframe: {e}") import traceback traceback.print_exc() return pd.DataFrame() def refresh_leaderboard(): """Refresh the leaderboard data""" print("🔄 Refreshing leaderboard data...") return get_leaderboard_dataframe() # Initialize data print("🚀 Initializing ATLAS leaderboard...") leaderboard_df = get_leaderboard_dataframe() print(f"📈 Leaderboard initialized with {len(leaderboard_df)} rows") # Define column types for the dataframe (Model, Organization, Accuracy, mG-Pass@2, mG-Pass@4, Submission Date) COLUMN_TYPES = ["markdown", "str", "number", "number", "number", "str"] # Create Gradio interface demo = gr.Blocks(css=""" .markdown-text { font-size: 16px !important; } #citation-button { font-family: monospace; } """) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") # Citation section - directly visible gr.Markdown("## 📙 Citation", elem_classes="markdown-text") citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", lines=6, max_lines=10, interactive=False ) # Main leaderboard table - COMMENTED OUT # gr.Markdown("## 🏆 ATLAS Benchmark Results", elem_classes="markdown-text") # # Debug information - dynamic component # results_count = gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**") # leaderboard_table = gr.Dataframe( # value=leaderboard_df, # datatype=COLUMN_TYPES, # interactive=False, # wrap=True, # column_widths=["30%", "20%", "12%", "12%", "12%", "14%"] # ) # # Refresh button # refresh_button = gr.Button("🔄 Refresh Leaderboard") # def refresh_leaderboard_with_count(): # """Refresh leaderboard and update count display""" # df = refresh_leaderboard() # count_text = f"📊 **Showing {len(df)} results**" # return df, count_text # refresh_button.click( # refresh_leaderboard_with_count, # inputs=[], # outputs=[leaderboard_table, results_count] # ) # Submission section with gr.Accordion("🎯 Submit Your ATLAS Results", open=False): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") gr.Markdown(""" ### 📋 Submission Requirements - File format: Upload a JSON file in the ATLAS format - Organization: Provide the exact organization name (shown on the leaderboard) - Contact email: Provide a valid email for notifications - Auto evaluation: After submission, the system will run LLM-based evaluation and update the leaderboard """, elem_classes="markdown-text") with gr.Row(): with gr.Column(): model_textbox = gr.Textbox( label="Model Name - will be shown on the leaderboard", placeholder="Your Model Name (e.g., GPT-4, Llama-2-70B)" ) org_textbox = gr.Textbox( label="Organization Name - will be shown on the leaderboard", placeholder="Your Organization" ) email_textbox = gr.Textbox( label="Contact Email - used for contact, not publicly visible", placeholder="contact@example.com" ) with gr.Column(): file_upload = gr.File( label="Upload ATLAS Results (JSON)", file_types=[".json"], type="filepath" ) # 提交按钮 (登录功能暂时注释) with gr.Row(): login_button = gr.LoginButton("🔐 Login with HuggingFace", size="lg") submit_button = gr.Button("Submit Results", variant="primary", size="lg") # 登录状态与用户信息 profile_state = gr.State() login_status = gr.Markdown(visible=True) # def on_login(profile: gr.OAuthProfile): # try: # if profile and getattr(profile, "name", None): # name = profile.name # text = f"✅ Logged in as: **{name}**" # else: # text = "❌ Login failed, please try again" # return profile, text # except Exception: # return None, "❌ Login failed, please try again" # login_button.click(on_login, inputs=None, outputs=[profile_state, login_status]) # 进度显示和结果显示区域 progress_info = gr.HTML() submission_result = gr.HTML() def show_progress(step, message, total_steps=4): """Show progress information""" progress_percentage = int((step / total_steps) * 100) progress_html = f"""

⏳ Processing submission...

{progress_percentage}%

Step {step}/{total_steps}: {message}

{f'{progress_percentage}%' if progress_percentage > 20 else ''}

{'✨ Almost done, please wait...' if step >= total_steps else '📤 Please wait, processing your submission...'}

""" return progress_html def handle_submission(file_upload, model_name, org_name, email, user_profile: gr.OAuthProfile): try: # 步骤1: 基本验证 yield show_progress(1, "Validating submission info"), "" # 校验登录 if user_profile is None or getattr(user_profile, "name", None) is None: yield "", format_error("Please log in with Hugging Face before submitting") return print(f"user_profile: {user_profile}") print(f"user_profile.name: {user_profile.name}") if not file_upload: yield "", format_error("Please select a file to upload") return if not model_name or not model_name.strip(): yield "", format_error("Please enter model name") return if not org_name or not org_name.strip(): yield "", format_error("Please enter organization name") return if not email or not email.strip(): yield "", format_error("Please enter email address") return # 验证邮箱格式 _, parsed_email = parseaddr(email) if "@" not in parsed_email: yield "", format_warning("Please provide a valid email address") return # 步骤2: 文件验证和读取 yield show_progress(2, "Validating file format and content"), "" import time time.sleep(0.5) # allow users to see progress update # 用户资格检查(账号年龄/频率/重复提交) eligible, msg = check_user_submission_eligibility(user_profile, org_name) if not eligible: yield "", format_error(msg) return # 步骤3: 上传到OSS yield show_progress(3, "Uploading file to OSS storage"), "" # 处理文件提交 from src.submission.submit import process_sage_submission_simple result = process_sage_submission_simple(file_upload, model_name, org_name, email) # 步骤4: 完成 yield show_progress(4, "Submission completed, preparing evaluation"), "" time.sleep(0.5) # allow users to see completion state # 记录提交历史 try: record_user_submission(user_profile, model_name, org_name, email) except Exception: pass # 生成成功信息 success_info = f"""

🎉 Submission successful!

Model: {model_name}

Organization: {org_name}

Email: {email}

Submitted at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

Your results have been submitted via OSS. LLM evaluation will complete in 5-10 minutes and the leaderboard will be updated.

""" # 清除进度条,显示最终结果 yield "", success_info + result except ImportError as e: yield "", format_error(f"Submission system modules unavailable: {e}") except Exception as e: import traceback traceback.print_exc() yield "", format_error(f"An error occurred during submission: {str(e)}") submit_button.click( handle_submission, inputs=[file_upload, model_textbox, org_textbox, email_textbox], # profile_state outputs=[progress_info, submission_result] ) # Launch the app if __name__ == "__main__": # Disable SSR mode for better OAuth compatibility # Note: OAuth is handled internally via gr.LoginButton, not at launch level demo.launch(ssr_mode=False)