Spaces:

frankai98
/

Tokentesting

Sleeping

App Files Files Community

frankai98 commited on Mar 28

Commit

076376a

verified ·

1 Parent(s): daa50b0

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -22

app.py CHANGED Viewed

@@ -11,13 +11,13 @@ import random
 import time
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
-# Retrieve the token from environment variables
 hf_token = os.environ.get("HF_TOKEN")
 if not hf_token:
     st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
     st.stop()
-# Login with the token
 login(token=hf_token)
 # Timer component using HTML and JavaScript
@@ -52,6 +52,7 @@ st.header("𝕏/Twitter Tweets Sentiment Report Generator")
 # Concise introduction
 st.write("This model🎰 will score your tweets in your CSV file🗄️ based on their sentiment😀 and generate a report🗟 answering your query question❔ based on those results.")
 def print_gpu_status(label):
     if torch.cuda.is_available():
         allocated = torch.cuda.memory_allocated() / 1024**3
@@ -78,6 +79,58 @@ def clear_gpu_memory():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         torch.cuda.ipc_collect()
 # Main Function Part:
 def main():
@@ -137,7 +190,7 @@ def main():
                     docs_to_summarize.append(doc)
                     docs_indices.append(i)
-            # If we have documents to summarize, load Llama model first
             if docs_to_summarize:
                 status_text.markdown("**📝 Loading summarization model...**")
                 t5_pipe = get_summary_model()
@@ -168,7 +221,7 @@ def main():
                     except Exception as e:
                         st.warning(f"Error summarizing document {i}: {str(e)}")
-                # Clear Llama model from memory
                 del t5_pipe
                 import gc
                 gc.collect()
@@ -208,7 +261,9 @@ def main():
                     status_text.markdown(f"**🔍 Scoring documents... ({i}/{len(candidate_docs)})**")
             # Pair documents with scores
-            scored_docs = list(zip(processed_docs, [result.get("score", 0.5) for result in scored_results]))
             # Clear sentiment model from memory
             del score_pipe
@@ -242,21 +297,7 @@ def main():
                 sampled_docs = scored_docs
             # Build prompt
-            messages = [
-                {"role": "user", "content": f"""
-Generate a well-structured business report based on tweets from twitter/X with sentiment score (0: negative, 1: neutral, 2: positive) that answers Query Question and meets following Requirements.
-**Requirements:**
-- Include an introduction, key insights, and a conclusion.
-- Ensure the analysis is concise and does not cut off abruptly.
-- Summarize major findings without repeating verbatim.
-- Cover both positive and negative aspects, highlighting trends in user sentiment.
-**Query Question:**
-"{query_input}"
-**Tweets with sentiment score:**
-{sampled_docs}
-Please ensure the report is complete and reaches approximately 800 words.
-                """}
-            ]
             # Create a process function to avoid the Triton registration issue
             def process_with_gemma(messages):
@@ -296,8 +337,7 @@ Please ensure the report is complete and reaches approximately 800 words.
                     report = "Error generating report. Please try again with fewer tweets."
             else:
                 # Extract content from successful Gemma result
-                report = raw_result[0]['generated_text']
-                #extract_assistant_content(raw_result)
             progress_bar.progress(100)
             status_text.success("**✅ Generation complete!**")

 import time
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+# Retrieve the token from environment variables for huggingface login
 hf_token = os.environ.get("HF_TOKEN")
 if not hf_token:
     st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
     st.stop()
+# Huggingface login with the token
 login(token=hf_token)
 # Timer component using HTML and JavaScript
 # Concise introduction
 st.write("This model🎰 will score your tweets in your CSV file🗄️ based on their sentiment😀 and generate a report🗟 answering your query question❔ based on those results.")
+# Display VRAM status for debug
 def print_gpu_status(label):
     if torch.cuda.is_available():
         allocated = torch.cuda.memory_allocated() / 1024**3
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         torch.cuda.ipc_collect()
+# Function to build appropriate prompt for text generation model
+def build_messages(query_input, sampled_docs):
+    docs_text = ""
+    for idx, doc in enumerate(sampled_docs):
+        docs_text += f"Tweet {idx+1} (Sentiment: {doc['sentiment']}): {doc['comment']}\n"
+    system_message = (
+        "You are an intelligent assistant. Your task is to generate a comprehensive business report "
+        "analyzing the provided tweets with sentiment scores. The report must include an introduction, "
+        "key insights, and a conclusion, and should be approximately 800 words long. "
+        "IMPORTANT: Do not include any introductory greetings, summary statements, or closing questions. "
+        "Output only the final report content."
+    )
+    user_content = (
+        f"**Tweets**:\n{docs_text}\n\n"
+        f"**Query Question**: \"{query_input}\"\n\n"
+        "Now, produce only the final report as instructed, without any extra commentary."
+    )
+    messages = [
+        {"role": "system", "content": system_message},
+        {"role": "user", "content": user_content}
+    ]
+    return messages
+# A helper to extract the assistant's response
+def extract_assistant_response(output):
+    """
+    Extract only the content from the assistant's response.
+    Handles nested structure from the pipeline output.
+    """
+    try:
+        # The output is expected to be a list containing a dict with 'generated_text'
+        if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0]:
+            messages = output[0]['generated_text']
+            if isinstance(messages, list):
+                for message in messages:
+                    if isinstance(message, dict) and message.get('role') == 'assistant':
+                        return message.get('content', '')
+        # Fallback: try to directly find 'assistant' role in output
+        if isinstance(output, list):
+            for item in output:
+                if isinstance(item, dict) and item.get('role') == 'assistant':
+                    return item.get('content', '')
+        print(f"DEBUG: Could not find assistant response in: {str(output)[:200]}...")
+        return ''
+    except Exception as e:
+        print(f"Error extracting assistant response: {e}")
+        return ''
 # Main Function Part:
 def main():
                     docs_to_summarize.append(doc)
                     docs_indices.append(i)
+            # If we have documents to summarize, load finetuned summarization model first
             if docs_to_summarize:
                 status_text.markdown("**📝 Loading summarization model...**")
                 t5_pipe = get_summary_model()
                     except Exception as e:
                         st.warning(f"Error summarizing document {i}: {str(e)}")
+                # Clear summarization model from memory
                 del t5_pipe
                 import gc
                 gc.collect()
                     status_text.markdown(f"**🔍 Scoring documents... ({i}/{len(candidate_docs)})**")
             # Pair documents with scores
+            scored_docs = [{"comment": doc, "sentiment": result.get("score", 1)}
+               for doc, result in zip(processed_docs, scored_results)]
             # Clear sentiment model from memory
             del score_pipe
                 sampled_docs = scored_docs
             # Build prompt
+            messages = build_messages(query_input, sampled_docs)
             # Create a process function to avoid the Triton registration issue
             def process_with_gemma(messages):
                     report = "Error generating report. Please try again with fewer tweets."
             else:
                 # Extract content from successful Gemma result
+                report = extract_assistant_response(raw_result)
             progress_bar.progress(100)
             status_text.success("**✅ Generation complete!**")