Spaces:

govtech
/

lionguard-demo

Running

App Files Files Community

gabrielchua commited on Jun 30

Commit

0989743

1 Parent(s): d78aca9

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -39

app.py CHANGED Viewed

@@ -5,19 +5,19 @@ import torch
 import sys
 import uuid
 from datetime import datetime
 import json
-import gspread
-from google.oauth2 import service_account
 from safetensors.torch import load_file
 from lionguard2 import LionGuard2, CATEGORIES
 from utils import get_embeddings
-# -- OpenAI Setup --
 client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# -- Model Loading --
 def load_lionguard2():
     model = LionGuard2()
     model.eval()
@@ -27,22 +27,26 @@ def load_lionguard2():
 model = load_lionguard2()
-# -- Google Sheets Config --
 GOOGLE_SHEET_URL = os.environ.get("GOOGLE_SHEET_URL")
 GOOGLE_CREDENTIALS = os.environ.get("GCP_SERVICE_ACCOUNT")
 RESULTS_SHEET_NAME = "results"
 VOTES_SHEET_NAME = "votes"
 def save_results_data(row):
     try:
-        credentials = service_account.Credentials.from_service_account_info(
-            json.loads(GOOGLE_CREDENTIALS),
-            scopes=[
-                "https://www.googleapis.com/auth/spreadsheets",
-                "https://www.googleapis.com/auth/drive",
-            ],
-        )
-        gc = gspread.authorize(credentials)
         sheet = gc.open_by_url(GOOGLE_SHEET_URL)
         ws = sheet.worksheet(RESULTS_SHEET_NAME)
         ws.append_row(list(row.values()))
@@ -51,14 +55,7 @@ def save_results_data(row):
 def save_vote_data(text_id, agree):
     try:
-        credentials = service_account.Credentials.from_service_account_info(
-            json.loads(GOOGLE_CREDENTIALS),
-            scopes=[
-                "https://www.googleapis.com/auth/spreadsheets",
-                "https://www.googleapis.com/auth/drive",
-            ],
-        )
-        gc = gspread.authorize(credentials)
         sheet = gc.open_by_url(GOOGLE_SHEET_URL)
         ws = sheet.worksheet(VOTES_SHEET_NAME)
         vote_row = {
@@ -70,6 +67,21 @@ def save_vote_data(text_id, agree):
     except Exception as e:
         print(f"Error saving vote data: {e}")
 # --- Classifier logic ---
 def format_score_with_style(score_str):
@@ -90,11 +102,11 @@ def format_score_with_style(score_str):
 def format_binary_score(score):
     percentage = int(score * 100)
     if score < 0.4:
-        return f'<div style="color: #34d399; font-weight:700;">✅ Pass ({percentage}/100)</div>'
     elif 0.4 <= score < 0.7:
-        return f'<div style="color: #fbbf24; font-weight:700;">⚠️ Warning ({percentage}/100)</div>'
     else:
-        return f'<div style="color: #fca5a5; font-weight:700;">🚨 Fail ({percentage}/100)</div>'
 def analyze_text(text):
     if not text.strip():
@@ -108,6 +120,7 @@ def analyze_text(text):
         main_categories = ['hateful', 'insults', 'sexual', 'physical_violence', 'self_harm', 'all_other_misconduct']
         categories_html = []
         for category in main_categories:
             subcategories = CATEGORIES[category]
             category_name = category.replace('_', ' ').title()
@@ -122,6 +135,7 @@ def analyze_text(text):
             category_display = f"{category_emojis.get(category_name, '📝')} {category_name}"
             level_scores = [results.get(subcategory_key, [0.0])[0] for subcategory_key in subcategories]
             max_score = max(level_scores) if level_scores else 0.0
             categories_html.append(f'''
             <tr>
                 <td>{category_display}</td>
@@ -147,12 +161,12 @@ def analyze_text(text):
                 "text_id": text_id,
                 "text": text,
                 "binary_score": binary_score,
-                # Add all category scores as before...
             }
             save_results_data(results_row)
         voting_html = '<div>Help improve LionGuard2! Rate the analysis below.</div>'
         return format_binary_score(binary_score), html_table, text_id, voting_html
     except Exception as e:
@@ -163,15 +177,15 @@ def vote_thumbs_up(text_id):
     if text_id and GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
         save_vote_data(text_id, True)
         return '<div style="color: #34d399; font-weight:700;">🎉 Thank you!</div>'
-    return '<div>Voting not available</div>'
 def vote_thumbs_down(text_id):
     if text_id and GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
         save_vote_data(text_id, False)
         return '<div style="color: #fca5a5; font-weight:700;">📝 Thanks for the feedback!</div>'
-    return '<div>Voting not available</div>'
-# --- Chatbot guardrail logic ---
 def get_openai_response(message, system_prompt="You are a helpful assistant."):
     try:
         response = client.chat.completions.create(
@@ -201,10 +215,10 @@ def lionguard_2(message, threshold=0.5):
         embeddings = get_embeddings([message])
         results = model.predict(embeddings)
         binary_prob = results['binary'][0]
-        return binary_prob > threshold
     except Exception as e:
         print(f"Error in LionGuard 2: {e}")
-        return False
 def process_message(message, history_no_mod, history_openai, history_lg):
     if not message.strip():
@@ -222,7 +236,7 @@ def process_message(message, history_no_mod, history_openai, history_lg):
         openai_response = get_openai_response(message)
         history_openai.append({"role": "assistant", "content": openai_response})
-    lg_flagged = lionguard_2(message)
     history_lg.append({"role": "user", "content": message})
     if lg_flagged:
         lg_response = "🚫 This message has been flagged by LionGuard 2"
@@ -231,6 +245,41 @@ def process_message(message, history_no_mod, history_openai, history_lg):
         lg_response = get_openai_response(message)
         history_lg.append({"role": "assistant", "content": lg_response})
     return history_no_mod, history_openai, history_lg, ""
 def clear_all_chats():
@@ -240,7 +289,7 @@ def clear_all_chats():
 DISCLAIMER = """
 <div style='background: #fbbf24; color: #1e293b; border-radius: 8px; padding: 14px; margin-bottom: 12px; font-size: 15px; font-weight:500;'>
-⚠️ LionGuard 2 is an experimental ML model and may make mistakes. All entries are logged (anonymised) to improve the model.
 </div>
 """
@@ -262,7 +311,7 @@ with gr.Blocks(title="LionGuard 2 Demo", theme=gr.themes.Soft()) as demo:
                     analyze_btn = gr.Button("Analyze", variant="primary")
                 with gr.Column(scale=1, min_width=400):
                     binary_output = gr.HTML(
-                        value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Enter text to analyze</div>'
                     )
                     category_table = gr.HTML(
                         value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Category scores will appear here after analysis</div>'
@@ -292,18 +341,18 @@ with gr.Blocks(title="LionGuard 2 Demo", theme=gr.themes.Soft()) as demo:
             thumbs_up_btn.click(vote_thumbs_up, inputs=[current_text_id], outputs=[voting_feedback])
             thumbs_down_btn.click(vote_thumbs_down, inputs=[current_text_id], outputs=[voting_feedback])
-        with gr.Tab("Chatbot Guardrail"):
             gr.HTML(DISCLAIMER)
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("#### 🔵 No Moderation")
-                    chatbot_no_mod = gr.Chatbot(height=400, label="No Moderation", show_label=False, bubble_full_width=False, type='messages')
                 with gr.Column(scale=1):
                     gr.Markdown("#### 🟠 OpenAI Moderation")
-                    chatbot_openai = gr.Chatbot(height=400, label="OpenAI Moderation", show_label=False, bubble_full_width=False, type='messages')
                 with gr.Column(scale=1):
                     gr.Markdown("#### 🛡️ LionGuard 2")
-                    chatbot_lg = gr.Chatbot(height=400, label="LionGuard 2", show_label=False, bubble_full_width=False, type='messages')
             gr.Markdown("##### 💬 Send Message to All Models")
             with gr.Row():
                 message_input = gr.Textbox(

 import sys
 import uuid
 from datetime import datetime
 import json
 from safetensors.torch import load_file
 from lionguard2 import LionGuard2, CATEGORIES
 from utils import get_embeddings
+import gspread
+from google.oauth2 import service_account
+# --- OpenAI Setup ---
 client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# --- Model Loading ---
 def load_lionguard2():
     model = LionGuard2()
     model.eval()
 model = load_lionguard2()
+# --- Google Sheets Config ---
 GOOGLE_SHEET_URL = os.environ.get("GOOGLE_SHEET_URL")
 GOOGLE_CREDENTIALS = os.environ.get("GCP_SERVICE_ACCOUNT")
 RESULTS_SHEET_NAME = "results"
 VOTES_SHEET_NAME = "votes"
+CHATBOT_SHEET_NAME = "chatbot"
+def get_gspread_client():
+    credentials = service_account.Credentials.from_service_account_info(
+        json.loads(GOOGLE_CREDENTIALS),
+        scopes=[
+            "https://www.googleapis.com/auth/spreadsheets",
+            "https://www.googleapis.com/auth/drive",
+        ],
+    )
+    return gspread.authorize(credentials)
 def save_results_data(row):
     try:
+        gc = get_gspread_client()
         sheet = gc.open_by_url(GOOGLE_SHEET_URL)
         ws = sheet.worksheet(RESULTS_SHEET_NAME)
         ws.append_row(list(row.values()))
 def save_vote_data(text_id, agree):
     try:
+        gc = get_gspread_client()
         sheet = gc.open_by_url(GOOGLE_SHEET_URL)
         ws = sheet.worksheet(VOTES_SHEET_NAME)
         vote_row = {
     except Exception as e:
         print(f"Error saving vote data: {e}")
+def log_chatbot_data(row):
+    try:
+        gc = get_gspread_client()
+        sheet = gc.open_by_url(GOOGLE_SHEET_URL)
+        ws = sheet.worksheet(CHATBOT_SHEET_NAME)
+        ws.append_row([
+            row["datetime"], row["text_id"], row["text"], row["binary_score"],
+            row["hateful_l1_score"], row["hateful_l2_score"], row["insults_score"],
+            row["sexual_l1_score"], row["sexual_l2_score"], row["physical_violence_score"],
+            row["self_harm_l1_score"], row["self_harm_l2_score"], row["aom_l1_score"],
+            row["aom_l2_score"], row["openai_score"]
+        ])
+    except Exception as e:
+        print(f"Error saving chatbot data: {e}")
 # --- Classifier logic ---
 def format_score_with_style(score_str):
 def format_binary_score(score):
     percentage = int(score * 100)
     if score < 0.4:
+        return f'<div style="background:linear-gradient(135deg, #065f46 0%, #047857 100%); color:#34d399; padding:48px 0; border-radius:20px; text-align:center; font-weight:900; border:3px solid #10b981; font-size:48px; margin:24px 0; box-shadow:0 4px 24px rgba(0,0,0,0.3);">✅ Pass ({percentage}/100)</div>'
     elif 0.4 <= score < 0.7:
+        return f'<div style="background:linear-gradient(135deg, #92400e 0%, #b45309 100%); color:#fbbf24; padding:48px 0; border-radius:20px; text-align:center; font-weight:900; border:3px solid #f59e0b; font-size:48px; margin:24px 0; box-shadow:0 4px 24px rgba(0,0,0,0.3);">⚠️ Warning ({percentage}/100)</div>'
     else:
+        return f'<div style="background:linear-gradient(135deg, #991b1b 0%, #b91c1c 100%); color:#fca5a5; padding:48px 0; border-radius:20px; text-align:center; font-weight:900; border:3px solid #ef4444; font-size:48px; margin:24px 0; box-shadow:0 4px 24px rgba(0,0,0,0.3);">🚨 Fail ({percentage}/100)</div>'
 def analyze_text(text):
     if not text.strip():
         main_categories = ['hateful', 'insults', 'sexual', 'physical_violence', 'self_harm', 'all_other_misconduct']
         categories_html = []
+        max_scores = {}
         for category in main_categories:
             subcategories = CATEGORIES[category]
             category_name = category.replace('_', ' ').title()
             category_display = f"{category_emojis.get(category_name, '📝')} {category_name}"
             level_scores = [results.get(subcategory_key, [0.0])[0] for subcategory_key in subcategories]
             max_score = max(level_scores) if level_scores else 0.0
+            max_scores[category] = max_score
             categories_html.append(f'''
             <tr>
                 <td>{category_display}</td>
                 "text_id": text_id,
                 "text": text,
                 "binary_score": binary_score,
             }
+            for category in main_categories:
+                results_row[f"{category}_max"] = max_scores[category]
             save_results_data(results_row)
         voting_html = '<div>Help improve LionGuard2! Rate the analysis below.</div>'
         return format_binary_score(binary_score), html_table, text_id, voting_html
     except Exception as e:
     if text_id and GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
         save_vote_data(text_id, True)
         return '<div style="color: #34d399; font-weight:700;">🎉 Thank you!</div>'
+    return '<div>Voting not available or analysis not yet run.</div>'
 def vote_thumbs_down(text_id):
     if text_id and GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
         save_vote_data(text_id, False)
         return '<div style="color: #fca5a5; font-weight:700;">📝 Thanks for the feedback!</div>'
+    return '<div>Voting not available or analysis not yet run.</div>'
+# --- Guardrail Comparison logic ---
 def get_openai_response(message, system_prompt="You are a helpful assistant."):
     try:
         response = client.chat.completions.create(
         embeddings = get_embeddings([message])
         results = model.predict(embeddings)
         binary_prob = results['binary'][0]
+        return binary_prob > threshold, binary_prob
     except Exception as e:
         print(f"Error in LionGuard 2: {e}")
+        return False, 0.0
 def process_message(message, history_no_mod, history_openai, history_lg):
     if not message.strip():
         openai_response = get_openai_response(message)
         history_openai.append({"role": "assistant", "content": openai_response})
+    lg_flagged, lg_score = lionguard_2(message)
     history_lg.append({"role": "user", "content": message})
     if lg_flagged:
         lg_response = "🚫 This message has been flagged by LionGuard 2"
         lg_response = get_openai_response(message)
         history_lg.append({"role": "assistant", "content": lg_response})
+    # --- Logging for chatbot worksheet ---
+    if GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
+        try:
+            embeddings = get_embeddings([message])
+            results = model.predict(embeddings)
+            now = datetime.now().isoformat()
+            text_id = str(uuid.uuid4())
+            row = {
+                "datetime": now,
+                "text_id": text_id,
+                "text": message,
+                "binary_score": results.get("binary", [None])[0],
+                "hateful_l1_score": results.get(CATEGORIES['hateful'][0], [None])[0],
+                "hateful_l2_score": results.get(CATEGORIES['hateful'][1], [None])[0],
+                "insults_score": results.get(CATEGORIES['insults'][0], [None])[0],
+                "sexual_l1_score": results.get(CATEGORIES['sexual'][0], [None])[0],
+                "sexual_l2_score": results.get(CATEGORIES['sexual'][1], [None])[0],
+                "physical_violence_score": results.get(CATEGORIES['physical_violence'][0], [None])[0],
+                "self_harm_l1_score": results.get(CATEGORIES['self_harm'][0], [None])[0],
+                "self_harm_l2_score": results.get(CATEGORIES['self_harm'][1], [None])[0],
+                "aom_l1_score": results.get(CATEGORIES['all_other_misconduct'][0], [None])[0],
+                "aom_l2_score": results.get(CATEGORIES['all_other_misconduct'][1], [None])[0],
+                "openai_score": None
+            }
+            try:
+                openai_result = client.moderations.create(input=message)
+                # Using the "hate" category score as a demonstration. You may customize as needed.
+                row["openai_score"] = float(openai_result.results[0].category_scores.get("hate", 0.0))
+            except Exception:
+                row["openai_score"] = None
+            log_chatbot_data(row)
+        except Exception as e:
+            print(f"Chatbot logging failed: {e}")
     return history_no_mod, history_openai, history_lg, ""
 def clear_all_chats():
 DISCLAIMER = """
 <div style='background: #fbbf24; color: #1e293b; border-radius: 8px; padding: 14px; margin-bottom: 12px; font-size: 15px; font-weight:500;'>
+⚠️ LionGuard 2 may make mistakes. All entries are logged (anonymised) to improve the model.
 </div>
 """
                     analyze_btn = gr.Button("Analyze", variant="primary")
                 with gr.Column(scale=1, min_width=400):
                     binary_output = gr.HTML(
+                        value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic; font-size:36px;">Enter text to analyze</div>'
                     )
                     category_table = gr.HTML(
                         value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Category scores will appear here after analysis</div>'
             thumbs_up_btn.click(vote_thumbs_up, inputs=[current_text_id], outputs=[voting_feedback])
             thumbs_down_btn.click(vote_thumbs_down, inputs=[current_text_id], outputs=[voting_feedback])
+        with gr.Tab("Guardrail Comparison"):
             gr.HTML(DISCLAIMER)
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("#### 🔵 No Moderation")
+                    chatbot_no_mod = gr.Chatbot(height=650, label="No Moderation", show_label=False, bubble_full_width=False, type='messages')
                 with gr.Column(scale=1):
                     gr.Markdown("#### 🟠 OpenAI Moderation")
+                    chatbot_openai = gr.Chatbot(height=650, label="OpenAI Moderation", show_label=False, bubble_full_width=False, type='messages')
                 with gr.Column(scale=1):
                     gr.Markdown("#### 🛡️ LionGuard 2")
+                    chatbot_lg = gr.Chatbot(height=650, label="LionGuard 2", show_label=False, bubble_full_width=False, type='messages')
             gr.Markdown("##### 💬 Send Message to All Models")
             with gr.Row():
                 message_input = gr.Textbox(