Bindumiryala commited on
Commit
6ead904
Β·
verified Β·
1 Parent(s): d58e48e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -0
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import whisper
5
+ import tempfile
6
+ import os
7
+ import torch
8
+ import sqlite3
9
+ import bcrypt
10
+ import imageio_ffmpeg
11
+ import subprocess
12
+ from moviepy.editor import VideoFileClip
13
+
14
+ # ------------------------------- DB Setup -------------------------------
15
+ conn = sqlite3.connect('users.db', check_same_thread=False)
16
+ cursor = conn.cursor()
17
+ cursor.execute('''CREATE TABLE IF NOT EXISTS users (
18
+ username TEXT PRIMARY KEY,
19
+ password TEXT NOT NULL
20
+ )''')
21
+ conn.commit()
22
+
23
+ # ------------------------------- Auth Helpers -------------------------------
24
+ def hash_password(password):
25
+ return bcrypt.hashpw(password.encode(), bcrypt.gensalt())
26
+
27
+ def verify_password(password, hashed):
28
+ return bcrypt.checkpw(password.encode(), hashed)
29
+
30
+ def add_user(username, password):
31
+ hashed_pwd = hash_password(password)
32
+ try:
33
+ cursor.execute("INSERT INTO users (username, password) VALUES (?, ?)", (username, hashed_pwd))
34
+ conn.commit()
35
+ return True
36
+ except:
37
+ return False
38
+
39
+ def authenticate_user(username, password):
40
+ cursor.execute("SELECT password FROM users WHERE username = ?", (username,))
41
+ result = cursor.fetchone()
42
+ if result and verify_password(password, result[0]):
43
+ return True
44
+ return False
45
+
46
+ # ------------------------------- Login / Signup UI -------------------------------
47
+ def login_signup_page():
48
+ st.set_page_config(page_title="Login | Hate Speech Classifier", layout="centered")
49
+ if 'page' not in st.session_state:
50
+ st.session_state.page = 'login'
51
+
52
+ st.markdown('<h1 style="text-align:center; color:#005f73;">πŸ—£οΈ Smart Hate Speech Classifier</h1>', unsafe_allow_html=True)
53
+ st.markdown('<p style="text-align:center; color:#0a9396;">Please log in or create a new account to continue.</p>', unsafe_allow_html=True)
54
+
55
+ if st.session_state.page == 'login':
56
+ username = st.text_input("πŸ‘€ Username")
57
+ password = st.text_input("πŸ”‘ Password", type="password")
58
+ if st.button("Login"):
59
+ if authenticate_user(username, password):
60
+ st.session_state.logged_in = True
61
+ st.session_state.username = username
62
+ st.session_state.page = "overview"
63
+ st.success("Login successful!")
64
+ st.rerun()
65
+ else:
66
+ st.error("Invalid credentials")
67
+ if st.button("New user? Create an account"):
68
+ st.session_state.page = 'register'
69
+
70
+ elif st.session_state.page == 'register':
71
+ new_user = st.text_input("πŸ‘€ New Username")
72
+ new_pass = st.text_input("πŸ”‘ New Password", type="password")
73
+ if st.button("Create Account"):
74
+ if add_user(new_user, new_pass):
75
+ st.success("Account created! You can now log in.")
76
+ else:
77
+ st.error("Username already exists!")
78
+ if st.button("Already have an account? Login"):
79
+ st.session_state.page = 'login'
80
+
81
+ # ------------------------------- Access Control -------------------------------
82
+ if "logged_in" not in st.session_state:
83
+ st.session_state.logged_in = False
84
+ if not st.session_state.logged_in:
85
+ login_signup_page()
86
+ st.stop()
87
+
88
+ # ------------------------------- FFmpeg Fix -------------------------------
89
+ ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
90
+ def custom_run(cmd, *args, **kwargs):
91
+ if cmd[0] == "ffmpeg":
92
+ cmd[0] = ffmpeg_path
93
+ return subprocess.run(cmd, *args, **kwargs)
94
+ import whisper.audio
95
+ whisper.audio.run = custom_run
96
+
97
+ # ------------------------------- Load Models -------------------------------
98
+ @st.cache_resource
99
+ def load_whisper_model():
100
+ return whisper.load_model("tiny")
101
+
102
+ @st.cache_resource
103
+ def load_bert_model():
104
+ tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
105
+ model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
106
+ model.eval()
107
+ return tokenizer, model
108
+
109
+ whisper_model = load_whisper_model()
110
+ tokenizer, classifier_model = load_bert_model()
111
+
112
+ # ------------------------------- Classifier -------------------------------
113
+ HATE_KEYWORDS = ["ugly", "stupid", "idiot", "hate", "kill", "trash","fuck you","bitch"]
114
+
115
+ def classify_text(text):
116
+ if any(word in text.lower() for word in HATE_KEYWORDS):
117
+ return "Hate Speech", 1.0
118
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
119
+ with torch.no_grad():
120
+ outputs = classifier_model(**inputs)
121
+ probs = torch.nn.functional.softmax(outputs.logits, dim=1)
122
+ confidence, pred = torch.max(probs, dim=1)
123
+ label = "Hate Speech" if pred.item() == 1 else "Not Hate Speech"
124
+ return label, confidence.item()
125
+
126
+ def show_result(label, score):
127
+ st.markdown("### πŸ” Prediction Result:")
128
+ score_percent = f"{score * 100:.2f}%"
129
+ if label == "Hate Speech":
130
+ st.error(f"{label} ({score_percent} confident)")
131
+ else:
132
+ st.success(f"{label} ({score_percent} confident)")
133
+
134
+ # ------------------------------- Sidebar -------------------------------
135
+ with st.sidebar:
136
+ st.title("πŸ“‹ Navigation")
137
+ if st.button("πŸ“˜ Project Overview"):
138
+ st.session_state.page = "overview"
139
+ if st.button("πŸŽ™οΈ Hate Speech Detector"):
140
+ st.session_state.page = "detector"
141
+
142
+ if 'page' not in st.session_state:
143
+ st.session_state.page = "overview"
144
+
145
+ # ------------------------------- Main Page -------------------------------
146
+ page = st.session_state.get("page", "overview")
147
+
148
+ if page == "overview":
149
+ st.title("πŸ—£οΈ Smart Hate Speech Classifier Using BERT & Whisper")
150
+ st.markdown("""
151
+ This AI-based project detects hate speech in:
152
+ - ✍️ Text Input
153
+ - πŸ”Š Audio Files
154
+ - πŸŽ₯ Video Uploads or URLs
155
+
156
+ ### πŸ”§ Models Used:
157
+ - `OpenAI Whisper` for Speech-to-Text
158
+ - `Toxic-BERT` for Hate Speech Classification
159
+ """)
160
+
161
+ elif page == "detector":
162
+ st.title("πŸŽ™οΈ Hate Speech Detection")
163
+ input_mode = st.radio("Choose Input Type", ["Text", "Audio", "Video Upload"])
164
+
165
+ if input_mode == "Text":
166
+ text_input = st.text_area("πŸ“ Enter your message:")
167
+ if st.button("Classify Text"):
168
+ if text_input.strip():
169
+ label, score = classify_text(text_input)
170
+ show_result(label, score)
171
+ else:
172
+ st.warning("⚠️ Please enter some text.")
173
+
174
+ elif input_mode == "Audio":
175
+ audio_file = st.file_uploader("πŸ“€ Upload Audio File:", type=["wav", "mp3"])
176
+ if audio_file:
177
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
178
+ temp_audio.write(audio_file.read())
179
+ audio_path = temp_audio.name
180
+ st.audio(audio_path)
181
+ result = whisper_model.transcribe(audio_path)
182
+ transcribed = result["text"]
183
+ st.success("πŸ“ Transcribed Text:")
184
+ st.info(transcribed)
185
+ label, score = classify_text(transcribed)
186
+ show_result(label, score)
187
+
188
+ elif input_mode == "Video Upload":
189
+ video_file = st.file_uploader("πŸ“€ Upload Video File:", type=["mp4", "mov", "avi"])
190
+ if video_file:
191
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
192
+ temp_video.write(video_file.read())
193
+ video_path = temp_video.name
194
+ st.video(video_path)
195
+ clip = VideoFileClip(video_path)
196
+ audio_path = "temp_video_audio.wav"
197
+ clip.audio.write_audiofile(audio_path)
198
+ result = whisper_model.transcribe(audio_path)
199
+ transcribed = result["text"]
200
+ st.success("πŸ“ Transcribed Text:")
201
+ st.info(transcribed)
202
+ label, score = classify_text(transcribed)
203
+ show_result(label, score)
204
+
205
+
206
+ st.markdown("---")
207
+ st.caption("Built with ❀️ using Streamlit, Whisper, and BERT.")