Spaces:

Bindumiryala
/

smart-hate-speech-classifier

Sleeping

App Files Files Community

Bindumiryala commited on Sep 29

Commit

6ead904

verified ·

1 Parent(s): d58e48e

Upload app.py

Browse files

Files changed (1) hide show

app.py +207 -0

app.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import whisper
+import tempfile
+import os
+import torch
+import sqlite3
+import bcrypt
+import imageio_ffmpeg
+import subprocess
+from moviepy.editor import VideoFileClip
+# ------------------------------- DB Setup -------------------------------
+conn = sqlite3.connect('users.db', check_same_thread=False)
+cursor = conn.cursor()
+cursor.execute('''CREATE TABLE IF NOT EXISTS users (
+    username TEXT PRIMARY KEY,
+    password TEXT NOT NULL
+)''')
+conn.commit()
+# ------------------------------- Auth Helpers -------------------------------
+def hash_password(password):
+    return bcrypt.hashpw(password.encode(), bcrypt.gensalt())
+def verify_password(password, hashed):
+    return bcrypt.checkpw(password.encode(), hashed)
+def add_user(username, password):
+    hashed_pwd = hash_password(password)
+    try:
+        cursor.execute("INSERT INTO users (username, password) VALUES (?, ?)", (username, hashed_pwd))
+        conn.commit()
+        return True
+    except:
+        return False
+def authenticate_user(username, password):
+    cursor.execute("SELECT password FROM users WHERE username = ?", (username,))
+    result = cursor.fetchone()
+    if result and verify_password(password, result[0]):
+        return True
+    return False
+# ------------------------------- Login / Signup UI -------------------------------
+def login_signup_page():
+    st.set_page_config(page_title="Login | Hate Speech Classifier", layout="centered")
+    if 'page' not in st.session_state:
+        st.session_state.page = 'login'
+    st.markdown('<h1 style="text-align:center; color:#005f73;">🗣️ Smart Hate Speech Classifier</h1>', unsafe_allow_html=True)
+    st.markdown('<p style="text-align:center; color:#0a9396;">Please log in or create a new account to continue.</p>', unsafe_allow_html=True)
+    if st.session_state.page == 'login':
+        username = st.text_input("👤 Username")
+        password = st.text_input("🔑 Password", type="password")
+        if st.button("Login"):
+            if authenticate_user(username, password):
+                st.session_state.logged_in = True
+                st.session_state.username = username
+                st.session_state.page = "overview"
+                st.success("Login successful!")
+                st.rerun()
+            else:
+                st.error("Invalid credentials")
+        if st.button("New user? Create an account"):
+            st.session_state.page = 'register'
+    elif st.session_state.page == 'register':
+        new_user = st.text_input("👤 New Username")
+        new_pass = st.text_input("🔑 New Password", type="password")
+        if st.button("Create Account"):
+            if add_user(new_user, new_pass):
+                st.success("Account created! You can now log in.")
+            else:
+                st.error("Username already exists!")
+        if st.button("Already have an account? Login"):
+            st.session_state.page = 'login'
+# ------------------------------- Access Control -------------------------------
+if "logged_in" not in st.session_state:
+    st.session_state.logged_in = False
+if not st.session_state.logged_in:
+    login_signup_page()
+    st.stop()
+# ------------------------------- FFmpeg Fix -------------------------------
+ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
+def custom_run(cmd, *args, **kwargs):
+    if cmd[0] == "ffmpeg":
+        cmd[0] = ffmpeg_path
+    return subprocess.run(cmd, *args, **kwargs)
+import whisper.audio
+whisper.audio.run = custom_run
+# ------------------------------- Load Models -------------------------------
+@st.cache_resource
+def load_whisper_model():
+    return whisper.load_model("tiny")
+@st.cache_resource
+def load_bert_model():
+    tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
+    model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
+    model.eval()
+    return tokenizer, model
+whisper_model = load_whisper_model()
+tokenizer, classifier_model = load_bert_model()
+# ------------------------------- Classifier -------------------------------
+HATE_KEYWORDS = ["ugly", "stupid", "idiot", "hate", "kill", "trash","fuck you","bitch"]
+def classify_text(text):
+    if any(word in text.lower() for word in HATE_KEYWORDS):
+        return "Hate Speech", 1.0
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = classifier_model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+    confidence, pred = torch.max(probs, dim=1)
+    label = "Hate Speech" if pred.item() == 1 else "Not Hate Speech"
+    return label, confidence.item()
+def show_result(label, score):
+    st.markdown("### 🔍 Prediction Result:")
+    score_percent = f"{score * 100:.2f}%"
+    if label == "Hate Speech":
+        st.error(f"{label} ({score_percent} confident)")
+    else:
+        st.success(f"{label} ({score_percent} confident)")
+# ------------------------------- Sidebar -------------------------------
+with st.sidebar:
+    st.title("📋 Navigation")
+    if st.button("📘 Project Overview"):
+        st.session_state.page = "overview"
+    if st.button("🎙️ Hate Speech Detector"):
+        st.session_state.page = "detector"
+if 'page' not in st.session_state:
+    st.session_state.page = "overview"
+# ------------------------------- Main Page -------------------------------
+page = st.session_state.get("page", "overview")
+if page == "overview":
+    st.title("🗣️ Smart Hate Speech Classifier Using BERT & Whisper")
+    st.markdown("""
+This AI-based project detects hate speech in:
+- ✍️ Text Input
+- 🔊 Audio Files
+- 🎥 Video Uploads or URLs
+### 🔧 Models Used:
+- `OpenAI Whisper` for Speech-to-Text
+- `Toxic-BERT` for Hate Speech Classification
+    """)
+elif page == "detector":
+    st.title("🎙️ Hate Speech Detection")
+    input_mode = st.radio("Choose Input Type", ["Text", "Audio", "Video Upload"])
+    if input_mode == "Text":
+        text_input = st.text_area("📝 Enter your message:")
+        if st.button("Classify Text"):
+            if text_input.strip():
+                label, score = classify_text(text_input)
+                show_result(label, score)
+            else:
+                st.warning("⚠️ Please enter some text.")
+    elif input_mode == "Audio":
+        audio_file = st.file_uploader("📤 Upload Audio File:", type=["wav", "mp3"])
+        if audio_file:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
+                temp_audio.write(audio_file.read())
+                audio_path = temp_audio.name
+            st.audio(audio_path)
+            result = whisper_model.transcribe(audio_path)
+            transcribed = result["text"]
+            st.success("📝 Transcribed Text:")
+            st.info(transcribed)
+            label, score = classify_text(transcribed)
+            show_result(label, score)
+    elif input_mode == "Video Upload":
+        video_file = st.file_uploader("📤 Upload Video File:", type=["mp4", "mov", "avi"])
+        if video_file:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
+                temp_video.write(video_file.read())
+                video_path = temp_video.name
+            st.video(video_path)
+            clip = VideoFileClip(video_path)
+            audio_path = "temp_video_audio.wav"
+            clip.audio.write_audiofile(audio_path)
+            result = whisper_model.transcribe(audio_path)
+            transcribed = result["text"]
+            st.success("📝 Transcribed Text:")
+            st.info(transcribed)
+            label, score = classify_text(transcribed)
+            show_result(label, score)
+st.markdown("---")
+st.caption("Built with ❤️ using Streamlit, Whisper, and BERT.")