Spaces:

frankai98
/

Tokentesting

Sleeping

App Files Files Community

frankai98 commited on Mar 28

Commit

f62daa9

verified ·

1 Parent(s): e03ee92

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -21

app.py CHANGED Viewed

@@ -81,10 +81,10 @@ def clear_gpu_memory():
         torch.cuda.ipc_collect()
 # Function to build appropriate prompt for text generation model
-def build_messages(query_input, sampled_docs):
     docs_text = ""
-    for idx, (comment, sentiment) in enumerate(sampled_docs):
-        docs_text += f"Tweet {idx+1} (Sentiment: {sentiment}): {comment}\n"
     system_message = (
         "You are an intelligent assistant. Your task is to generate a comprehensive business report "
@@ -100,12 +100,9 @@ def build_messages(query_input, sampled_docs):
         "Now, produce only the final report as instructed, without any extra commentary."
     )
-    messages = [
-        {"role": "system", "content": system_message},
-        {"role": "user", "content": user_content}
-    ]
-    return messages
 # A helper to extract the assistant's response
 def extract_assistant_response(output):
@@ -294,18 +291,13 @@ def main():
                 sampled_docs = scored_docs
             # Build prompt
-            messages = build_messages(query_input, sampled_docs)
             # Create a process function to avoid the Triton registration issue
-            def process_with_gemma(messages):
-                # We'll define the pipeline here rather than using the cached version
-                # This ensures a clean library registration context
                 from transformers import pipeline, AutoTokenizer
                 import torch
-                # Set dtype explicitly
-                # torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
                 try:
                     tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-3-1b-it")
                     pipe = pipeline(
@@ -313,21 +305,19 @@ def main():
                         model="unsloth/gemma-3-1b-it",
                         tokenizer=tokenizer,
                         device=0 if torch.cuda.is_available() else -1,
-                        # torch_dtype=torch_dtype
                     )
-                    result = pipe(messages, max_new_tokens=1024, repetition_penalty=1.2, do_sample=True, temperature=0.7, return_full_text=False)
                     return result, None
                 except Exception as e:
                     return None, str(e)
             # Try to process with Gemma
             status_text.markdown("**📝 Generating report with Gemma...**")
             progress_bar.progress(80)
-            raw_result, error = process_with_gemma(messages)
             if error:
                     st.error(f"Gemma processing failed: {str(error)}")

         torch.cuda.ipc_collect()
 # Function to build appropriate prompt for text generation model
+def build_prompt(query_input, sampled_docs):
     docs_text = ""
+    for idx, doc in enumerate(sampled_docs):
+        docs_text += f"Tweet {idx+1} (Sentiment: {doc['sentiment']}): {doc['comment']}\n"
     system_message = (
         "You are an intelligent assistant. Your task is to generate a comprehensive business report "
         "Now, produce only the final report as instructed, without any extra commentary."
     )
+    prompt = system_message + "\n\n" + user_content
+    return prompt
 # A helper to extract the assistant's response
 def extract_assistant_response(output):
                 sampled_docs = scored_docs
             # Build prompt
+            prompt = build_prompt(query_input, sampled_docs)
             # Create a process function to avoid the Triton registration issue
+            def process_with_gemma(prompt):
                 from transformers import pipeline, AutoTokenizer
                 import torch
                 try:
                     tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-3-1b-it")
                     pipe = pipeline(
                         model="unsloth/gemma-3-1b-it",
                         tokenizer=tokenizer,
                         device=0 if torch.cuda.is_available() else -1,
                     )
+                    result = pipe(prompt, max_new_tokens=1024, repetition_penalty=1.2, do_sample=True, temperature=0.7, return_full_text=False)
                     return result, None
                 except Exception as e:
                     return None, str(e)
             # Try to process with Gemma
             status_text.markdown("**📝 Generating report with Gemma...**")
             progress_bar.progress(80)
+            raw_result, error = process_with_gemma(prompt)
             if error:
                     st.error(f"Gemma processing failed: {str(error)}")