Spaces:

frankai98
/

Tokentesting

Sleeping

App Files Files Community

frankai98 commited on Mar 28

Commit

ac1df46

verified ·

1 Parent(s): c7d97a5

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -77

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 import nest_asyncio
 nest_asyncio.apply()
 import streamlit as st
@@ -46,11 +45,11 @@ def timer():
     </script>
     """
-st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="📝")
-st.header("Review Scorer & Report Generator")
 # Concise introduction
-st.write("This model will score your reviews in your CSV file and generate a report based on your query and those results.")
 def print_gpu_status(label):
     if torch.cuda.is_available():
@@ -62,15 +61,14 @@ def print_gpu_status(label):
 @st.cache_resource
 def get_sentiment_model():
     return pipeline("text-classification",
-                    model="nlptown/bert-base-multilingual-uncased-sentiment",
                     device=0 if torch.cuda.is_available() else -1)
 @st.cache_resource
-def get_llama_model():
     return pipeline("text-generation",
-                   model="meta-llama/Llama-3.2-1B-Instruct",
-                   device=0,
-                   torch_dtype=torch.bfloat16)
 # Function to clear GPU memory
 def clear_gpu_memory():
@@ -80,21 +78,21 @@ def clear_gpu_memory():
         torch.cuda.empty_cache()
         torch.cuda.ipc_collect()
-# Let the user specify the column name for review text (defaulting to "reviewText")
-review_column = st.text_input("Enter the column name for review text:", value="reviewText")
-# Input: Query text for scoring and CSV file upload for candidate reviews
 query_input = st.text_area("Enter your query question for analysis (this does not need to be part of the CSV):")
-uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'reviewText' column)", type=["csv"])
 candidate_docs = []
 if uploaded_file is not None:
     try:
         df = pd.read_csv(uploaded_file)
-        if review_column not in df.columns:
-            st.error(f"CSV must contain a '{review_column}' column.")
         else:
-            candidate_docs = df[review_column].dropna().astype(str).tolist()
     except Exception as e:
         st.error(f"Error reading CSV file: {e}")
@@ -104,12 +102,12 @@ if st.button("Generate Report"):
     st.session_state.timer_frozen = False
     if uploaded_file is None:
         st.error("Please upload a CSV file.")
-    elif not review_column.strip():
         st.error("Please enter your column name")
     elif not candidate_docs:
-        st.error(f"CSV must contain a '{review_column}' column.")
     elif not query_input.strip():
-        st.error("Please enter a query text!")
     else:
         if not st.session_state.timer_started and not st.session_state.timer_frozen:
             st.session_state.timer_started = True
@@ -133,7 +131,7 @@ if st.button("Generate Report"):
         # If we have documents to summarize, load Llama model first
         if docs_to_summarize:
             status_text.markdown("**📝 Loading summarization model...**")
-            llama_pipe = get_llama_model()
             status_text.markdown("**📝 Summarizing long documents...**")
@@ -142,17 +140,17 @@ if st.button("Generate Report"):
                 progress = int((idx / len(docs_to_summarize)) * 25)  # First quarter of progress
                 progress_bar.progress(progress)
-                summary_prompt = [
-                    {"role": "user", "content": f"Summarize the following text into a shorter version that preserves the sentiment and key points: {doc[:2000]}..."}
-                ]
                 try:
-                    summary_result = llama_pipe(
-                        summary_prompt,
-                        max_new_tokens=30,
-                        do_sample=True,
-                        temperature=0.3,
-                        return_full_text=False
                     )
                     # Store the summary in place of the original text
@@ -162,7 +160,7 @@ if st.button("Generate Report"):
                     st.warning(f"Error summarizing document {i}: {str(e)}")
             # Clear Llama model from memory
-            del llama_pipe
             import gc
             gc.collect()
             torch.cuda.empty_cache()
@@ -194,7 +192,7 @@ if st.button("Generate Report"):
             except Exception as e:
                 st.warning(f"Error scoring document {i}: {str(e)}")
                 processed_docs.append("Error processing this document")
-                scored_results.append({"label": "NEUTRAL", "score": 0.5})
             # Display occasional status updates
             if i % max(1, len(candidate_docs) // 10) == 0:
@@ -227,25 +225,25 @@ if st.button("Generate Report"):
         progress_bar.progress(75)
         import random
-        max_reviews = 50
-        if len(scored_docs) > max_reviews:
-            sampled_docs = random.sample(scored_docs, max_reviews)
-            st.info(f"Sampling {max_reviews} out of {len(scored_docs)} reviews for report generation")
         else:
             sampled_docs = scored_docs
         # Build prompt
         messages = [
             {"role": "user", "content": f"""
-Generate a well-structured report based on Reviews with sentiment that answers Query Question and meets Requirements.
 **Requirements:**
-- Include an introduction, key insights, and a conclusion, all within the word limit.
 - Ensure the analysis is concise and does not cut off abruptly.
 - Summarize major findings without repeating verbatim.
 - Cover both positive and negative aspects, highlighting trends in user sentiment.
 **Query Question:**
 "{query_input}"
-**Reviews with sentiment:**
 {sampled_docs}
 Please ensure the report is complete and reaches approximately 1000 words.
         """}
@@ -285,45 +283,8 @@ Please ensure the report is complete and reaches approximately 1000 words.
         raw_result, error = process_with_gemma(messages)
         if error:
-            # If Gemma processing failed, try fallback to Llama
-            st.warning(f"Gemma processing failed: {error}")
-            status_text.markdown("**📝 Trying fallback model...**")
-            try:
-                # Use Llama instead since it worked earlier
-                llama_pipe = get_llama_model()
-                # Simplify prompt for fallback
-                fallback_prompt = [
-                    {"role": "user", "content": f"""
-Generate a well-structured, approximately 1000-word report based on Reviews with sentiment that answers Query Question and meets Requirements.
-**Requirements:**
-- Include an introduction, key insights, and a conclusion, all within the word limit.
-- Ensure the analysis is concise and does not cut off abruptly.
-- Summarize major findings without repeating verbatim.
-- Cover both positive and negative aspects, highlighting trends in user sentiment.
-**Query Question:**
-"{query_input}"
-**Reviews with sentiment:**
-{sampled_docs}
-                    """}
-                ]
-                raw_result = llama_pipe(
-                    fallback_prompt,
-                    max_new_tokens=200,
-                    repetition_penalty=1.2,
-                    do_sample=True,
-                    temperature=0.7,
-                    return_full_text=False
-                )
-                # Format Llama result to match expected structure
-                report = raw_result[0]['generated_text']
-            except Exception as e:
-                st.error(f"Fallback also failed: {str(e)}")
-                report = "Error generating report. Please try again with fewer reviews."
         else:
             # Extract content from successful Gemma result
             report = raw_result[0]['generated_text']

 import os
 import nest_asyncio
 nest_asyncio.apply()
 import streamlit as st
     </script>
     """
+st.set_page_config(page_title="Twitter/X Tweets Scorer & Report Generator", page_icon="📝")
+st.header("Twitter/X Tweets Scorer & Report Generator")
 # Concise introduction
+st.write("This model will score your tweets in your CSV file based on their sentiment and generate a report answering your query question based on those results.")
 def print_gpu_status(label):
     if torch.cuda.is_available():
 @st.cache_resource
 def get_sentiment_model():
     return pipeline("text-classification",
+                    model="cardiffnlp/twitter-roberta-base-sentiment-latest",
                     device=0 if torch.cuda.is_available() else -1)
 @st.cache_resource
+def get_summary_model():
     return pipeline("text-generation",
+                   model="frankai98/T5FinetunedCommentSummary",
+                   device=0 if torch.cuda.is_available() else -1)
 # Function to clear GPU memory
 def clear_gpu_memory():
         torch.cuda.empty_cache()
         torch.cuda.ipc_collect()
+# Let the user specify the column name for tweets text (defaulting to "content")
+tweets_column = st.text_input("Enter the column name for Tweets:", value="content")
+# Input: Query question for scoring and CSV file upload for candidate tweets
 query_input = st.text_area("Enter your query question for analysis (this does not need to be part of the CSV):")
+uploaded_file = st.file_uploader(f"Upload Tweets CSV File (must contain a '{tweets_column}' column)", type=["csv"])
 candidate_docs = []
 if uploaded_file is not None:
     try:
         df = pd.read_csv(uploaded_file)
+        if tweets_column not in df.columns:
+            st.error(f"CSV must contain a '{tweets_column}' column.")
         else:
+            candidate_docs = df[tweets_column].dropna().astype(str).tolist()
     except Exception as e:
         st.error(f"Error reading CSV file: {e}")
     st.session_state.timer_frozen = False
     if uploaded_file is None:
         st.error("Please upload a CSV file.")
+    elif not tweets_column.strip():
         st.error("Please enter your column name")
     elif not candidate_docs:
+        st.error(f"CSV must contain a '{tweets_column}' column.")
     elif not query_input.strip():
+        st.error("Please enter a query question!")
     else:
         if not st.session_state.timer_started and not st.session_state.timer_frozen:
             st.session_state.timer_started = True
         # If we have documents to summarize, load Llama model first
         if docs_to_summarize:
             status_text.markdown("**📝 Loading summarization model...**")
+            t5_pipe = get_summary_model()
             status_text.markdown("**📝 Summarizing long documents...**")
                 progress = int((idx / len(docs_to_summarize)) * 25)  # First quarter of progress
                 progress_bar.progress(progress)
+                input_text = "summarize: " + text
                 try:
+                    summary_result = t5_pipe(
+                        input_text,
+                        max_length=128,
+                        min_length=10,
+                        no_repeat_ngram_size=2,
+                        num_beams=4,
+                        early_stopping=True,
+                        truncation=True
                     )
                     # Store the summary in place of the original text
                     st.warning(f"Error summarizing document {i}: {str(e)}")
             # Clear Llama model from memory
+            del t5_pipe
             import gc
             gc.collect()
             torch.cuda.empty_cache()
             except Exception as e:
                 st.warning(f"Error scoring document {i}: {str(e)}")
                 processed_docs.append("Error processing this document")
+                scored_results.append({"label": "NEUTRAL", "score": 1})
             # Display occasional status updates
             if i % max(1, len(candidate_docs) // 10) == 0:
         progress_bar.progress(75)
         import random
+        max_tweets = 50
+        if len(scored_docs) > max_tweets:
+            sampled_docs = random.sample(scored_docs, max_tweets)
+            st.info(f"Sampling {max_tweets} out of {len(scored_docs)} tweets for report generation")
         else:
             sampled_docs = scored_docs
         # Build prompt
         messages = [
             {"role": "user", "content": f"""
+Generate a well-structured business report based on tweets from twitter/X with sentiment score (0: negative, 1: neutral, 2: positive) that answers Query Question and meets following Requirements.
 **Requirements:**
+- Include an introduction, key insights, and a conclusion.
 - Ensure the analysis is concise and does not cut off abruptly.
 - Summarize major findings without repeating verbatim.
 - Cover both positive and negative aspects, highlighting trends in user sentiment.
 **Query Question:**
 "{query_input}"
+**Tweets with sentiment score:**
 {sampled_docs}
 Please ensure the report is complete and reaches approximately 1000 words.
         """}
         raw_result, error = process_with_gemma(messages)
         if error:
+                st.error(f"Gemma processing failed: {str(e)}")
+                report = "Error generating report. Please try again with fewer tweets."
         else:
             # Extract content from successful Gemma result
             report = raw_result[0]['generated_text']