import os import json import datetime import requests from email.utils import parseaddr import gradio as gr import pandas as pd import numpy as np from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn, ModelType, fields, WeightType, Precision ) # ATLAS specific imports - use populate module to avoid transformers dependency try: from src.populate import process_sage_results_for_leaderboard, get_sage_leaderboard_df SAGE_MODULES_AVAILABLE = process_sage_results_for_leaderboard is not None if SAGE_MODULES_AVAILABLE: print("✅ ATLAS modules loaded successfully") else: print("❌ ATLAS modules not available") except ImportError as e: print(f"Warning: ATLAS modules not available: {e}") SAGE_MODULES_AVAILABLE = False # Configuration TOKEN = os.environ.get("HF_TOKEN", None) OWNER = "opencompass" # OSS submission tracking paths SUBMISSION_TRACKING_PATH = "atlas_eval/submissions/user_tracking/" SUBMISSION_HISTORY_FILE = "submission_history.json" def format_error(msg): return f"
{msg}
" def format_warning(msg): return f"{msg}
" def format_log(msg): return f"{msg}
" def model_hyperlink(link, model_name): if link and link.startswith("http"): return f'{model_name}' return model_name def load_submission_history(): """Load user submission history from OSS""" try: from src.oss.oss_file_manager import OSSFileManager oss_manager = OSSFileManager() # Try to download submission history file history_content = oss_manager.download_file_content( SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE ) if history_content: return json.loads(history_content) else: print("📝 Creating new submission history") return {} except Exception as e: print(f"⚠️ Failed to load submission history: {e}") return {} def save_submission_history(history): """Save user submission history to OSS""" try: from src.oss.oss_file_manager import OSSFileManager oss_manager = OSSFileManager() # Upload submission history history_json = json.dumps(history, indent=2, ensure_ascii=False) success = oss_manager.upload_file_content( content=history_json, object_key=SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE ) return success except Exception as e: print(f"❌ Failed to save submission history: {e}") return False def check_user_submission_eligibility(profile: gr.OAuthProfile, org_name: str): """Check user submission eligibility""" try: # 1. Check account age limit (60 days) user_data = requests.get(f"https://huggingface.co/api/users/{profile.username}/overview") if user_data.status_code == 200: creation_date = json.loads(user_data.content)["createdAt"] account_age = datetime.datetime.now() - datetime.datetime.strptime(creation_date, '%Y-%m-%dT%H:%M:%S.%fZ') if account_age < datetime.timedelta(days=60): return False, "This account does not meet the submission requirement. Account age must exceed 60 days." else: return False, "Unable to verify account information. Please try again later." # 2. Check daily submission limit submission_history = load_submission_history() user_submissions = submission_history.get(profile.username, []) today = datetime.datetime.today().strftime('%Y-%m-%d') today_submissions = [s for s in user_submissions if s.get("date", "") == today] if len(today_submissions) >= 2: return False, "You have already submitted twice today. Please try again tomorrow." return True, "Eligibility check passed" except Exception as e: print(f"❌ User eligibility check failed: {e}") return False, f"System check error, please try again later: {str(e)}" def record_user_submission(profile: gr.OAuthProfile, model_name: str, org_name: str, email: str): """Record user submission""" try: submission_history = load_submission_history() if profile.username not in submission_history: submission_history[profile.username] = [] # Record this submission submission_record = { "date": datetime.datetime.today().strftime('%Y-%m-%d'), "time": datetime.datetime.now().strftime('%H:%M:%S'), "model": model_name, "organization": org_name, "email": email, "username": profile.username } submission_history[profile.username].append(submission_record) # Save submission history return save_submission_history(submission_history) except Exception as e: print(f"❌ Failed to record submission history: {e}") return False def get_leaderboard_dataframe(): """Generate leaderboard dataframe from ATLAS results""" print("🔄 Loading ATLAS leaderboard data...") if not SAGE_MODULES_AVAILABLE: print("❌ ATLAS modules not available") return pd.DataFrame() try: # Use the updated get_sage_leaderboard_df function df = get_sage_leaderboard_df() if df.empty: print("❌ No ATLAS results found") return pd.DataFrame() print(f"✅ Generated dataframe with {len(df)} rows") return df except Exception as e: print(f"❌ Error generating leaderboard dataframe: {e}") import traceback traceback.print_exc() return pd.DataFrame() def refresh_leaderboard(): """Refresh the leaderboard data""" print("🔄 Refreshing leaderboard data...") return get_leaderboard_dataframe() # Initialize data print("🚀 Initializing ATLAS leaderboard...") leaderboard_df = get_leaderboard_dataframe() print(f"📈 Leaderboard initialized with {len(leaderboard_df)} rows") # Define column types for the dataframe (Model, Organization, Accuracy, mG-Pass@2, mG-Pass@4, Submission Date) COLUMN_TYPES = ["markdown", "str", "number", "number", "number", "str"] # Create Gradio interface demo = gr.Blocks(css=""" .markdown-text { font-size: 16px !important; } #citation-button { font-family: monospace; } """) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") # Citation section - directly visible gr.Markdown("## 📙 Citation", elem_classes="markdown-text") citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", lines=6, max_lines=10, interactive=False ) # Main leaderboard table - COMMENTED OUT # gr.Markdown("## 🏆 ATLAS Benchmark Results", elem_classes="markdown-text") # # Debug information - dynamic component # results_count = gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**") # leaderboard_table = gr.Dataframe( # value=leaderboard_df, # datatype=COLUMN_TYPES, # interactive=False, # wrap=True, # column_widths=["30%", "20%", "12%", "12%", "12%", "14%"] # ) # # Refresh button # refresh_button = gr.Button("🔄 Refresh Leaderboard") # def refresh_leaderboard_with_count(): # """Refresh leaderboard and update count display""" # df = refresh_leaderboard() # count_text = f"📊 **Showing {len(df)} results**" # return df, count_text # refresh_button.click( # refresh_leaderboard_with_count, # inputs=[], # outputs=[leaderboard_table, results_count] # ) # Submission section with gr.Accordion("🎯 Submit Your ATLAS Results", open=False): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") gr.Markdown(""" ### 📋 Submission Requirements - File format: Upload a JSON file in the ATLAS format - Organization: Provide the exact organization name (shown on the leaderboard) - Contact email: Provide a valid email for notifications - Auto evaluation: After submission, the system will run LLM-based evaluation and update the leaderboard """, elem_classes="markdown-text") with gr.Row(): with gr.Column(): model_textbox = gr.Textbox( label="Model Name - will be shown on the leaderboard", placeholder="Your Model Name (e.g., GPT-4, Llama-2-70B)" ) org_textbox = gr.Textbox( label="Organization Name - will be shown on the leaderboard", placeholder="Your Organization" ) email_textbox = gr.Textbox( label="Contact Email - used for contact, not publicly visible", placeholder="contact@example.com" ) with gr.Column(): file_upload = gr.File( label="Upload ATLAS Results (JSON)", file_types=[".json"], type="filepath" ) # 提交按钮 (登录功能暂时注释) with gr.Row(): login_button = gr.LoginButton("🔐 Login with HuggingFace", size="lg") submit_button = gr.Button("Submit Results", variant="primary", size="lg") # 登录状态与用户信息 profile_state = gr.State() login_status = gr.Markdown(visible=True) # def on_login(profile: gr.OAuthProfile): # try: # if profile and getattr(profile, "name", None): # name = profile.name # text = f"✅ Logged in as: **{name}**" # else: # text = "❌ Login failed, please try again" # return profile, text # except Exception: # return None, "❌ Login failed, please try again" # login_button.click(on_login, inputs=None, outputs=[profile_state, login_status]) # 进度显示和结果显示区域 progress_info = gr.HTML() submission_result = gr.HTML() def show_progress(step, message, total_steps=4): """Show progress information""" progress_percentage = int((step / total_steps) * 100) progress_html = f"""Step {step}/{total_steps}: {message}
{'✨ Almost done, please wait...' if step >= total_steps else '📤 Please wait, processing your submission...'}
Model: {model_name}
Organization: {org_name}
Email: {email}
Submitted at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Your results have been submitted via OSS. LLM evaluation will complete in 5-10 minutes and the leaderboard will be updated.